blob: 33630ebacc4cb8c98508b0102b9bb9d04bda78c5 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080084static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010094static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000096
Markus Armbrusterf9092b12010-06-25 10:33:39 +020097/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800124void bdrv_io_limits_disable(BlockDriverState *bs)
125{
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140}
141
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800142static void bdrv_block_timer(void *opaque)
143{
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147}
148
149void bdrv_io_limits_enable(BlockDriverState *bs)
150{
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158}
159
160bool bdrv_io_limits_enabled(BlockDriverState *bs)
161{
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169}
170
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800171static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173{
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194}
195
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000196/* check if the path starts with "<protocol>:" */
197static int path_has_protocol(const char *path)
198{
199#ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204#endif
205
206 return strchr(path, ':') != NULL;
207}
208
bellard83f64092006-08-01 16:21:11 +0000209int path_is_absolute(const char *path)
210{
211 const char *p;
bellard21664422007-01-07 18:22:37 +0000212#ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216#endif
bellard83f64092006-08-01 16:21:11 +0000217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000222#ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224#else
225 return (*p == '/');
226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200342int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000344{
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200347
348 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000349}
350
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352{
353 BlockDriver *drv;
354
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900355 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000357 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200358 }
359
360 return bdrv_create(drv, filename, options);
361}
362
bellardd5249392004-08-03 21:14:23 +0000363#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000364void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000365{
bellard3b9f94e2007-01-07 17:27:07 +0000366 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000367
bellard3b9f94e2007-01-07 17:27:07 +0000368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000370}
371#else
bellard95389c82005-12-18 18:28:15 +0000372void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000373{
374 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000375 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000376 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000381 fd = mkstemp(filename);
382 close(fd);
383}
bellardd5249392004-08-03 21:14:23 +0000384#endif
bellardea2384d2004-08-01 21:59:26 +0000385
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200386/*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200390static BlockDriver *find_hdev_driver(const char *filename)
391{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200394
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100395 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200403 }
404
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200405 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200406}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200407
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900408BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200409{
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
Christoph Hellwig39508e72010-06-23 12:25:17 +0200417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200426 return drv1;
427 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000429 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200430 return bdrv_find_format("file");
431 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000432 p = strchr(filename, ':');
433 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446}
447
Stefan Weilc98ac352010-07-21 21:51:51 +0200448static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000449{
bellard83f64092006-08-01 16:21:11 +0000450 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000451 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000452 uint8_t buf[2048];
453 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000454
Naphtali Spreif5edb012010-01-17 16:48:13 +0200455 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700460
Kevin Wolf08a00552010-06-01 18:37:31 +0200461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700470 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700471
bellard83f64092006-08-01 16:21:11 +0000472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200475 *pdrv = NULL;
476 return ret;
bellard83f64092006-08-01 16:21:11 +0000477 }
478
bellardea2384d2004-08-01 21:59:26 +0000479 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
bellardea2384d2004-08-01 21:59:26 +0000488 }
489 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
bellardea2384d2004-08-01 21:59:26 +0000495}
496
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100497/**
498 * Set the current 'total_sectors' value
499 */
500static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501{
502 BlockDriver *drv = bs->drv;
503
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519}
520
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100521/**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526int bdrv_parse_cache_flags(const char *mode, int *flags)
527{
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546}
547
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000548/**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553void bdrv_enable_copy_on_read(BlockDriverState *bs)
554{
555 bs->copy_on_read++;
556}
557
558void bdrv_disable_copy_on_read(BlockDriverState *bs)
559{
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562}
563
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200564/*
Kevin Wolf57915332010-04-14 15:24:50 +0200565 * Common part for opening disk images and files
566 */
567static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569{
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200576 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100577 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200578 bs->encrypted = 0;
579 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100582 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200583 bs->buffer_alignment = 512;
584
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
Kevin Wolf57915332010-04-14 15:24:50 +0200590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100591 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500598 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200609 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
Kevin Wolf57915332010-04-14 15:24:50 +0200627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200634 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100635
Kevin Wolf57915332010-04-14 15:24:50 +0200636#ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640#endif
641 return 0;
642
643free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500648 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652}
653
654/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
bellard83f64092006-08-01 16:21:11 +0000657int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000658{
bellard83f64092006-08-01 16:21:11 +0000659 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200660 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000661 int ret;
662
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900663 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200664 if (!drv) {
665 return -ENOENT;
666 }
667
bellard83f64092006-08-01 16:21:11 +0000668 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200669 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000673 }
aliguori71d07702009-03-03 17:37:16 +0000674 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000675 *pbs = bs;
676 return 0;
bellardea2384d2004-08-01 21:59:26 +0000677}
bellardfc01f7e2003-06-30 10:03:06 +0000678
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200679/*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200682int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000684{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200685 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200686 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000687
bellard83f64092006-08-01 16:21:11 +0000688 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000689 BlockDriverState *bs1;
690 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000691 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200694 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000695
bellardea2384d2004-08-01 21:59:26 +0000696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200701 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000702 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000703 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000704 return ret;
bellardea2384d2004-08-01 21:59:26 +0000705 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
bellardea2384d2004-08-01 21:59:26 +0000711 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000712
bellardea2384d2004-08-01 21:59:26 +0000713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000719 else if (!realpath(filename, backing_filename))
720 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000721
Kevin Wolf91a073a2009-05-27 14:48:06 +0200722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
Jes Sorensen3e829902010-05-27 16:20:30 +0200725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200733 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000734 if (ret < 0) {
735 return ret;
bellardea2384d2004-08-01 21:59:26 +0000736 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737
bellardea2384d2004-08-01 21:59:26 +0000738 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200739 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000740 bs->is_temporary = 1;
741 }
bellard712e7872005-04-28 21:09:32 +0000742
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200743 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200744 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200745 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000746 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100747
aliguori51d7c002009-03-05 23:00:29 +0000748 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000749 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000750 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100755 goto unlink_and_fail;
756 }
757
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000776 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200796 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 }
798
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200804 return 0;
805
806unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811}
812
bellardfc01f7e2003-06-30 10:03:06 +0000813void bdrv_close(BlockDriverState *bs)
814{
bellard19cb3732006-08-19 11:45:59 +0000815 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200816 if (bs->job) {
817 block_job_cancel_sync(bs->job);
818 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200819 if (bs == bs_snapshots) {
820 bs_snapshots = NULL;
821 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100822 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000823 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100824 bs->backing_hd = NULL;
825 }
bellardea2384d2004-08-01 21:59:26 +0000826 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500827 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000828#ifdef _WIN32
829 if (bs->is_temporary) {
830 unlink(bs->filename);
831 }
bellard67b915a2004-03-31 23:37:16 +0000832#endif
bellardea2384d2004-08-01 21:59:26 +0000833 bs->opaque = NULL;
834 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000835 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000836
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200837 if (bs->file != NULL) {
838 bdrv_close(bs->file);
839 }
840
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200841 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000842 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800843
844 /*throttling disk I/O limits*/
845 if (bs->io_limits_enabled) {
846 bdrv_io_limits_disable(bs);
847 }
bellardb3380822004-03-14 21:38:54 +0000848}
849
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900850void bdrv_close_all(void)
851{
852 BlockDriverState *bs;
853
854 QTAILQ_FOREACH(bs, &bdrv_states, list) {
855 bdrv_close(bs);
856 }
857}
858
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000859/*
860 * Wait for pending requests to complete across all BlockDriverStates
861 *
862 * This function does not flush data to disk, use bdrv_flush_all() for that
863 * after calling this function.
864 */
865void bdrv_drain_all(void)
866{
867 BlockDriverState *bs;
868
869 qemu_aio_flush();
870
871 /* If requests are still pending there is a bug somewhere */
872 QTAILQ_FOREACH(bs, &bdrv_states, list) {
873 assert(QLIST_EMPTY(&bs->tracked_requests));
874 assert(qemu_co_queue_empty(&bs->throttled_reqs));
875 }
876}
877
Ryan Harperd22b2f42011-03-29 20:51:47 -0500878/* make a BlockDriverState anonymous by removing from bdrv_state list.
879 Also, NULL terminate the device_name to prevent double remove */
880void bdrv_make_anon(BlockDriverState *bs)
881{
882 if (bs->device_name[0] != '\0') {
883 QTAILQ_REMOVE(&bdrv_states, bs, list);
884 }
885 bs->device_name[0] = '\0';
886}
887
Jeff Cody8802d1f2012-02-28 15:54:06 -0500888/*
889 * Add new bs contents at the top of an image chain while the chain is
890 * live, while keeping required fields on the top layer.
891 *
892 * This will modify the BlockDriverState fields, and swap contents
893 * between bs_new and bs_top. Both bs_new and bs_top are modified.
894 *
895 * This function does not create any image files.
896 */
897void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
898{
899 BlockDriverState tmp;
900
901 /* the new bs must not be in bdrv_states */
902 bdrv_make_anon(bs_new);
903
904 tmp = *bs_new;
905
906 /* there are some fields that need to stay on the top layer: */
907
908 /* dev info */
909 tmp.dev_ops = bs_top->dev_ops;
910 tmp.dev_opaque = bs_top->dev_opaque;
911 tmp.dev = bs_top->dev;
912 tmp.buffer_alignment = bs_top->buffer_alignment;
913 tmp.copy_on_read = bs_top->copy_on_read;
914
915 /* i/o timing parameters */
916 tmp.slice_time = bs_top->slice_time;
917 tmp.slice_start = bs_top->slice_start;
918 tmp.slice_end = bs_top->slice_end;
919 tmp.io_limits = bs_top->io_limits;
920 tmp.io_base = bs_top->io_base;
921 tmp.throttled_reqs = bs_top->throttled_reqs;
922 tmp.block_timer = bs_top->block_timer;
923 tmp.io_limits_enabled = bs_top->io_limits_enabled;
924
925 /* geometry */
926 tmp.cyls = bs_top->cyls;
927 tmp.heads = bs_top->heads;
928 tmp.secs = bs_top->secs;
929 tmp.translation = bs_top->translation;
930
931 /* r/w error */
932 tmp.on_read_error = bs_top->on_read_error;
933 tmp.on_write_error = bs_top->on_write_error;
934
935 /* i/o status */
936 tmp.iostatus_enabled = bs_top->iostatus_enabled;
937 tmp.iostatus = bs_top->iostatus;
938
939 /* keep the same entry in bdrv_states */
940 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
941 tmp.list = bs_top->list;
942
943 /* The contents of 'tmp' will become bs_top, as we are
944 * swapping bs_new and bs_top contents. */
945 tmp.backing_hd = bs_new;
946 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
947
948 /* swap contents of the fixed new bs and the current top */
949 *bs_new = *bs_top;
950 *bs_top = tmp;
951
952 /* clear the copied fields in the new backing file */
953 bdrv_detach_dev(bs_new, bs_new->dev);
954
955 qemu_co_queue_init(&bs_new->throttled_reqs);
956 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
957 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
958 bdrv_iostatus_disable(bs_new);
959
960 /* we don't use bdrv_io_limits_disable() for this, because we don't want
961 * to affect or delete the block_timer, as it has been moved to bs_top */
962 bs_new->io_limits_enabled = false;
963 bs_new->block_timer = NULL;
964 bs_new->slice_time = 0;
965 bs_new->slice_start = 0;
966 bs_new->slice_end = 0;
967}
968
bellardb3380822004-03-14 21:38:54 +0000969void bdrv_delete(BlockDriverState *bs)
970{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200971 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +0200972 assert(!bs->job);
973 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +0200974
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100975 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500976 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000977
bellardb3380822004-03-14 21:38:54 +0000978 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200979 if (bs->file != NULL) {
980 bdrv_delete(bs->file);
981 }
982
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200983 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500984 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000985}
986
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200987int bdrv_attach_dev(BlockDriverState *bs, void *dev)
988/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200989{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200990 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200991 return -EBUSY;
992 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200993 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300994 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200995 return 0;
996}
997
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200998/* TODO qdevified devices don't use this, remove when devices are qdevified */
999void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001000{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001001 if (bdrv_attach_dev(bs, dev) < 0) {
1002 abort();
1003 }
1004}
1005
1006void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1007/* TODO change to DeviceState *dev when all users are qdevified */
1008{
1009 assert(bs->dev == dev);
1010 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001011 bs->dev_ops = NULL;
1012 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001013 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001014}
1015
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001016/* TODO change to return DeviceState * when all users are qdevified */
1017void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001018{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001019 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001020}
1021
Markus Armbruster0e49de52011-08-03 15:07:41 +02001022void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1023 void *opaque)
1024{
1025 bs->dev_ops = ops;
1026 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001027 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1028 bs_snapshots = NULL;
1029 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001030}
1031
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001032void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1033 BlockQMPEventAction action, int is_read)
1034{
1035 QObject *data;
1036 const char *action_str;
1037
1038 switch (action) {
1039 case BDRV_ACTION_REPORT:
1040 action_str = "report";
1041 break;
1042 case BDRV_ACTION_IGNORE:
1043 action_str = "ignore";
1044 break;
1045 case BDRV_ACTION_STOP:
1046 action_str = "stop";
1047 break;
1048 default:
1049 abort();
1050 }
1051
1052 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1053 bdrv->device_name,
1054 action_str,
1055 is_read ? "read" : "write");
1056 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1057
1058 qobject_decref(data);
1059}
1060
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001061static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1062{
1063 QObject *data;
1064
1065 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1066 bdrv_get_device_name(bs), ejected);
1067 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1068
1069 qobject_decref(data);
1070}
1071
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001072static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001073{
Markus Armbruster145feb12011-08-03 15:07:42 +02001074 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001075 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001076 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001077 if (tray_was_closed) {
1078 /* tray open */
1079 bdrv_emit_qmp_eject_event(bs, true);
1080 }
1081 if (load) {
1082 /* tray close */
1083 bdrv_emit_qmp_eject_event(bs, false);
1084 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001085 }
1086}
1087
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001088bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1089{
1090 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1091}
1092
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001093void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1094{
1095 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1096 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1097 }
1098}
1099
Markus Armbrustere4def802011-09-06 18:58:53 +02001100bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1101{
1102 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1103 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1104 }
1105 return false;
1106}
1107
Markus Armbruster145feb12011-08-03 15:07:42 +02001108static void bdrv_dev_resize_cb(BlockDriverState *bs)
1109{
1110 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1111 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001112 }
1113}
1114
Markus Armbrusterf1076392011-09-06 18:58:46 +02001115bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1116{
1117 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1118 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1119 }
1120 return false;
1121}
1122
aliguorie97fc192009-04-21 23:11:50 +00001123/*
1124 * Run consistency checks on an image
1125 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001126 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001127 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001128 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001129 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001130int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001131{
1132 if (bs->drv->bdrv_check == NULL) {
1133 return -ENOTSUP;
1134 }
1135
Kevin Wolfe076f332010-06-29 11:43:13 +02001136 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001137 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001138}
1139
Kevin Wolf8a426612010-07-16 17:17:01 +02001140#define COMMIT_BUF_SECTORS 2048
1141
bellard33e39632003-07-06 17:15:21 +00001142/* commit COW file into the raw image */
1143int bdrv_commit(BlockDriverState *bs)
1144{
bellard19cb3732006-08-19 11:45:59 +00001145 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001146 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001147 int64_t sector, total_sectors;
1148 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001149 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001150 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001151 char filename[1024];
1152 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001153
bellard19cb3732006-08-19 11:45:59 +00001154 if (!drv)
1155 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001156
1157 if (!bs->backing_hd) {
1158 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001159 }
1160
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001161 if (bs->backing_hd->keep_read_only) {
1162 return -EACCES;
1163 }
Kevin Wolfee181192010-08-05 13:05:22 +02001164
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001165 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1166 return -EBUSY;
1167 }
1168
Kevin Wolfee181192010-08-05 13:05:22 +02001169 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001170 ro = bs->backing_hd->read_only;
1171 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1172 open_flags = bs->backing_hd->open_flags;
1173
1174 if (ro) {
1175 /* re-open as RW */
1176 bdrv_delete(bs->backing_hd);
1177 bs->backing_hd = NULL;
1178 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001179 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1180 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001181 if (rw_ret < 0) {
1182 bdrv_delete(bs_rw);
1183 /* try to re-open read-only */
1184 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001185 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1186 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001187 if (ret < 0) {
1188 bdrv_delete(bs_ro);
1189 /* drive not functional anymore */
1190 bs->drv = NULL;
1191 return ret;
1192 }
1193 bs->backing_hd = bs_ro;
1194 return rw_ret;
1195 }
1196 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001197 }
bellardea2384d2004-08-01 21:59:26 +00001198
Jan Kiszka6ea44302009-11-30 18:21:19 +01001199 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001200 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001201
Kevin Wolf8a426612010-07-16 17:17:01 +02001202 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001203 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001204
1205 if (bdrv_read(bs, sector, buf, n) != 0) {
1206 ret = -EIO;
1207 goto ro_cleanup;
1208 }
1209
1210 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1211 ret = -EIO;
1212 goto ro_cleanup;
1213 }
bellardea2384d2004-08-01 21:59:26 +00001214 }
1215 }
bellard95389c82005-12-18 18:28:15 +00001216
Christoph Hellwig1d449522010-01-17 12:32:30 +01001217 if (drv->bdrv_make_empty) {
1218 ret = drv->bdrv_make_empty(bs);
1219 bdrv_flush(bs);
1220 }
bellard95389c82005-12-18 18:28:15 +00001221
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001222 /*
1223 * Make sure all data we wrote to the backing device is actually
1224 * stable on disk.
1225 */
1226 if (bs->backing_hd)
1227 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001228
1229ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001230 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001231
1232 if (ro) {
1233 /* re-open as RO */
1234 bdrv_delete(bs->backing_hd);
1235 bs->backing_hd = NULL;
1236 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001237 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1238 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001239 if (ret < 0) {
1240 bdrv_delete(bs_ro);
1241 /* drive not functional anymore */
1242 bs->drv = NULL;
1243 return ret;
1244 }
1245 bs->backing_hd = bs_ro;
1246 bs->backing_hd->keep_read_only = 0;
1247 }
1248
Christoph Hellwig1d449522010-01-17 12:32:30 +01001249 return ret;
bellard33e39632003-07-06 17:15:21 +00001250}
1251
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001252int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001253{
1254 BlockDriverState *bs;
1255
1256 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001257 int ret = bdrv_commit(bs);
1258 if (ret < 0) {
1259 return ret;
1260 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001261 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001262 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001263}
1264
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001265struct BdrvTrackedRequest {
1266 BlockDriverState *bs;
1267 int64_t sector_num;
1268 int nb_sectors;
1269 bool is_write;
1270 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001271 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001272 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001273};
1274
1275/**
1276 * Remove an active request from the tracked requests list
1277 *
1278 * This function should be called when a tracked request is completing.
1279 */
1280static void tracked_request_end(BdrvTrackedRequest *req)
1281{
1282 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001283 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001284}
1285
1286/**
1287 * Add an active request to the tracked requests list
1288 */
1289static void tracked_request_begin(BdrvTrackedRequest *req,
1290 BlockDriverState *bs,
1291 int64_t sector_num,
1292 int nb_sectors, bool is_write)
1293{
1294 *req = (BdrvTrackedRequest){
1295 .bs = bs,
1296 .sector_num = sector_num,
1297 .nb_sectors = nb_sectors,
1298 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001299 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001300 };
1301
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001302 qemu_co_queue_init(&req->wait_queue);
1303
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001304 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1305}
1306
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001307/**
1308 * Round a region to cluster boundaries
1309 */
1310static void round_to_clusters(BlockDriverState *bs,
1311 int64_t sector_num, int nb_sectors,
1312 int64_t *cluster_sector_num,
1313 int *cluster_nb_sectors)
1314{
1315 BlockDriverInfo bdi;
1316
1317 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1318 *cluster_sector_num = sector_num;
1319 *cluster_nb_sectors = nb_sectors;
1320 } else {
1321 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1322 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1323 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1324 nb_sectors, c);
1325 }
1326}
1327
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001328static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1329 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001330 /* aaaa bbbb */
1331 if (sector_num >= req->sector_num + req->nb_sectors) {
1332 return false;
1333 }
1334 /* bbbb aaaa */
1335 if (req->sector_num >= sector_num + nb_sectors) {
1336 return false;
1337 }
1338 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001339}
1340
1341static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1342 int64_t sector_num, int nb_sectors)
1343{
1344 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001345 int64_t cluster_sector_num;
1346 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001347 bool retry;
1348
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001349 /* If we touch the same cluster it counts as an overlap. This guarantees
1350 * that allocating writes will be serialized and not race with each other
1351 * for the same cluster. For example, in copy-on-read it ensures that the
1352 * CoR read and write operations are atomic and guest writes cannot
1353 * interleave between them.
1354 */
1355 round_to_clusters(bs, sector_num, nb_sectors,
1356 &cluster_sector_num, &cluster_nb_sectors);
1357
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001358 do {
1359 retry = false;
1360 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001361 if (tracked_request_overlaps(req, cluster_sector_num,
1362 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001363 /* Hitting this means there was a reentrant request, for
1364 * example, a block driver issuing nested requests. This must
1365 * never happen since it means deadlock.
1366 */
1367 assert(qemu_coroutine_self() != req->co);
1368
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001369 qemu_co_queue_wait(&req->wait_queue);
1370 retry = true;
1371 break;
1372 }
1373 }
1374 } while (retry);
1375}
1376
Kevin Wolf756e6732010-01-12 12:55:17 +01001377/*
1378 * Return values:
1379 * 0 - success
1380 * -EINVAL - backing format specified, but no file
1381 * -ENOSPC - can't update the backing file because no space is left in the
1382 * image file header
1383 * -ENOTSUP - format driver doesn't support changing the backing file
1384 */
1385int bdrv_change_backing_file(BlockDriverState *bs,
1386 const char *backing_file, const char *backing_fmt)
1387{
1388 BlockDriver *drv = bs->drv;
1389
1390 if (drv->bdrv_change_backing_file != NULL) {
1391 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1392 } else {
1393 return -ENOTSUP;
1394 }
1395}
1396
aliguori71d07702009-03-03 17:37:16 +00001397static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1398 size_t size)
1399{
1400 int64_t len;
1401
1402 if (!bdrv_is_inserted(bs))
1403 return -ENOMEDIUM;
1404
1405 if (bs->growable)
1406 return 0;
1407
1408 len = bdrv_getlength(bs);
1409
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001410 if (offset < 0)
1411 return -EIO;
1412
1413 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001414 return -EIO;
1415
1416 return 0;
1417}
1418
1419static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1420 int nb_sectors)
1421{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001422 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1423 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001424}
1425
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001426typedef struct RwCo {
1427 BlockDriverState *bs;
1428 int64_t sector_num;
1429 int nb_sectors;
1430 QEMUIOVector *qiov;
1431 bool is_write;
1432 int ret;
1433} RwCo;
1434
1435static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1436{
1437 RwCo *rwco = opaque;
1438
1439 if (!rwco->is_write) {
1440 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001441 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001442 } else {
1443 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001444 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001445 }
1446}
1447
1448/*
1449 * Process a synchronous request using coroutines
1450 */
1451static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1452 int nb_sectors, bool is_write)
1453{
1454 QEMUIOVector qiov;
1455 struct iovec iov = {
1456 .iov_base = (void *)buf,
1457 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1458 };
1459 Coroutine *co;
1460 RwCo rwco = {
1461 .bs = bs,
1462 .sector_num = sector_num,
1463 .nb_sectors = nb_sectors,
1464 .qiov = &qiov,
1465 .is_write = is_write,
1466 .ret = NOT_DONE,
1467 };
1468
1469 qemu_iovec_init_external(&qiov, &iov, 1);
1470
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001471 /**
1472 * In sync call context, when the vcpu is blocked, this throttling timer
1473 * will not fire; so the I/O throttling function has to be disabled here
1474 * if it has been enabled.
1475 */
1476 if (bs->io_limits_enabled) {
1477 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1478 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1479 bdrv_io_limits_disable(bs);
1480 }
1481
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001482 if (qemu_in_coroutine()) {
1483 /* Fast-path if already in coroutine context */
1484 bdrv_rw_co_entry(&rwco);
1485 } else {
1486 co = qemu_coroutine_create(bdrv_rw_co_entry);
1487 qemu_coroutine_enter(co, &rwco);
1488 while (rwco.ret == NOT_DONE) {
1489 qemu_aio_wait();
1490 }
1491 }
1492 return rwco.ret;
1493}
1494
bellard19cb3732006-08-19 11:45:59 +00001495/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001496int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001497 uint8_t *buf, int nb_sectors)
1498{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001499 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001500}
1501
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001502static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001503 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001504{
1505 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001506 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001507
Jan Kiszka6ea44302009-11-30 18:21:19 +01001508 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001509 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001510
1511 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001512 idx = start / (sizeof(unsigned long) * 8);
1513 bit = start % (sizeof(unsigned long) * 8);
1514 val = bs->dirty_bitmap[idx];
1515 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001516 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001517 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001518 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001519 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001520 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001521 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001522 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001523 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001524 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001525 }
1526 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001527 }
1528}
1529
ths5fafdf22007-09-16 21:08:06 +00001530/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001531 -EIO generic I/O error (may happen for all errors)
1532 -ENOMEDIUM No media inserted.
1533 -EINVAL Invalid sector number or nb_sectors
1534 -EACCES Trying to write a read-only device
1535*/
ths5fafdf22007-09-16 21:08:06 +00001536int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001537 const uint8_t *buf, int nb_sectors)
1538{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001539 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001540}
1541
aliguorieda578e2009-03-12 19:57:16 +00001542int bdrv_pread(BlockDriverState *bs, int64_t offset,
1543 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001544{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001545 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001546 int len, nb_sectors, count;
1547 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001548 int ret;
bellard83f64092006-08-01 16:21:11 +00001549
1550 count = count1;
1551 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001552 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001553 if (len > count)
1554 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001555 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001556 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001557 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1558 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001559 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001560 count -= len;
1561 if (count == 0)
1562 return count1;
1563 sector_num++;
1564 buf += len;
1565 }
1566
1567 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001568 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001569 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001570 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1571 return ret;
bellard83f64092006-08-01 16:21:11 +00001572 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001573 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001574 buf += len;
1575 count -= len;
1576 }
1577
1578 /* add data from the last sector */
1579 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001580 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1581 return ret;
bellard83f64092006-08-01 16:21:11 +00001582 memcpy(buf, tmp_buf, count);
1583 }
1584 return count1;
1585}
1586
aliguorieda578e2009-03-12 19:57:16 +00001587int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1588 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001589{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001590 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001591 int len, nb_sectors, count;
1592 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001593 int ret;
bellard83f64092006-08-01 16:21:11 +00001594
1595 count = count1;
1596 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001597 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001598 if (len > count)
1599 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001600 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001601 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001602 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1603 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001604 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001605 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1606 return ret;
bellard83f64092006-08-01 16:21:11 +00001607 count -= len;
1608 if (count == 0)
1609 return count1;
1610 sector_num++;
1611 buf += len;
1612 }
1613
1614 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001615 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001616 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001617 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1618 return ret;
bellard83f64092006-08-01 16:21:11 +00001619 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001620 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001621 buf += len;
1622 count -= len;
1623 }
1624
1625 /* add data from the last sector */
1626 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001627 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1628 return ret;
bellard83f64092006-08-01 16:21:11 +00001629 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001630 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1631 return ret;
bellard83f64092006-08-01 16:21:11 +00001632 }
1633 return count1;
1634}
bellard83f64092006-08-01 16:21:11 +00001635
Kevin Wolff08145f2010-06-16 16:38:15 +02001636/*
1637 * Writes to the file and ensures that no writes are reordered across this
1638 * request (acts as a barrier)
1639 *
1640 * Returns 0 on success, -errno in error cases.
1641 */
1642int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1643 const void *buf, int count)
1644{
1645 int ret;
1646
1647 ret = bdrv_pwrite(bs, offset, buf, count);
1648 if (ret < 0) {
1649 return ret;
1650 }
1651
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001652 /* No flush needed for cache modes that use O_DSYNC */
1653 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001654 bdrv_flush(bs);
1655 }
1656
1657 return 0;
1658}
1659
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001660static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001661 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1662{
1663 /* Perform I/O through a temporary buffer so that users who scribble over
1664 * their read buffer while the operation is in progress do not end up
1665 * modifying the image file. This is critical for zero-copy guest I/O
1666 * where anything might happen inside guest memory.
1667 */
1668 void *bounce_buffer;
1669
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001670 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001671 struct iovec iov;
1672 QEMUIOVector bounce_qiov;
1673 int64_t cluster_sector_num;
1674 int cluster_nb_sectors;
1675 size_t skip_bytes;
1676 int ret;
1677
1678 /* Cover entire cluster so no additional backing file I/O is required when
1679 * allocating cluster in the image file.
1680 */
1681 round_to_clusters(bs, sector_num, nb_sectors,
1682 &cluster_sector_num, &cluster_nb_sectors);
1683
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001684 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1685 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001686
1687 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1688 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1689 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1690
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001691 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1692 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001693 if (ret < 0) {
1694 goto err;
1695 }
1696
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001697 if (drv->bdrv_co_write_zeroes &&
1698 buffer_is_zero(bounce_buffer, iov.iov_len)) {
1699 ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
1700 cluster_nb_sectors);
1701 } else {
1702 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001703 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001704 }
1705
Stefan Hajnocziab185922011-11-17 13:40:31 +00001706 if (ret < 0) {
1707 /* It might be okay to ignore write errors for guest requests. If this
1708 * is a deliberate copy-on-read then we don't want to ignore the error.
1709 * Simply report it in all cases.
1710 */
1711 goto err;
1712 }
1713
1714 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1715 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1716 nb_sectors * BDRV_SECTOR_SIZE);
1717
1718err:
1719 qemu_vfree(bounce_buffer);
1720 return ret;
1721}
1722
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001723/*
1724 * Handle a read request in coroutine context
1725 */
1726static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001727 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1728 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001729{
1730 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001731 BdrvTrackedRequest req;
1732 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001733
Kevin Wolfda1fa912011-07-14 17:27:13 +02001734 if (!drv) {
1735 return -ENOMEDIUM;
1736 }
1737 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1738 return -EIO;
1739 }
1740
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001741 /* throttling disk read I/O */
1742 if (bs->io_limits_enabled) {
1743 bdrv_io_limits_intercept(bs, false, nb_sectors);
1744 }
1745
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001746 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001747 flags |= BDRV_REQ_COPY_ON_READ;
1748 }
1749 if (flags & BDRV_REQ_COPY_ON_READ) {
1750 bs->copy_on_read_in_flight++;
1751 }
1752
1753 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001754 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1755 }
1756
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001757 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001758
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001759 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001760 int pnum;
1761
1762 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1763 if (ret < 0) {
1764 goto out;
1765 }
1766
1767 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001768 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001769 goto out;
1770 }
1771 }
1772
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001773 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001774
1775out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001776 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001777
1778 if (flags & BDRV_REQ_COPY_ON_READ) {
1779 bs->copy_on_read_in_flight--;
1780 }
1781
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001782 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001783}
1784
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001785int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001786 int nb_sectors, QEMUIOVector *qiov)
1787{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001788 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001789
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001790 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1791}
1792
1793int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1794 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1795{
1796 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1797
1798 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1799 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001800}
1801
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001802static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1803 int64_t sector_num, int nb_sectors)
1804{
1805 BlockDriver *drv = bs->drv;
1806 QEMUIOVector qiov;
1807 struct iovec iov;
1808 int ret;
1809
1810 /* First try the efficient write zeroes operation */
1811 if (drv->bdrv_co_write_zeroes) {
1812 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1813 }
1814
1815 /* Fall back to bounce buffer if write zeroes is unsupported */
1816 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1817 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1818 memset(iov.iov_base, 0, iov.iov_len);
1819 qemu_iovec_init_external(&qiov, &iov, 1);
1820
1821 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1822
1823 qemu_vfree(iov.iov_base);
1824 return ret;
1825}
1826
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001827/*
1828 * Handle a write request in coroutine context
1829 */
1830static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001831 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1832 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001833{
1834 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001835 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001836 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001837
1838 if (!bs->drv) {
1839 return -ENOMEDIUM;
1840 }
1841 if (bs->read_only) {
1842 return -EACCES;
1843 }
1844 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1845 return -EIO;
1846 }
1847
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001848 /* throttling disk write I/O */
1849 if (bs->io_limits_enabled) {
1850 bdrv_io_limits_intercept(bs, true, nb_sectors);
1851 }
1852
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001853 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001854 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1855 }
1856
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001857 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1858
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001859 if (flags & BDRV_REQ_ZERO_WRITE) {
1860 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1861 } else {
1862 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1863 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001864
Kevin Wolfda1fa912011-07-14 17:27:13 +02001865 if (bs->dirty_bitmap) {
1866 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1867 }
1868
1869 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1870 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1871 }
1872
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001873 tracked_request_end(&req);
1874
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001875 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001876}
1877
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001878int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1879 int nb_sectors, QEMUIOVector *qiov)
1880{
1881 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1882
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001883 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1884}
1885
1886int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1887 int64_t sector_num, int nb_sectors)
1888{
1889 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1890
1891 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1892 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001893}
1894
bellard83f64092006-08-01 16:21:11 +00001895/**
bellard83f64092006-08-01 16:21:11 +00001896 * Truncate file to 'offset' bytes (needed only for file protocols)
1897 */
1898int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1899{
1900 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001901 int ret;
bellard83f64092006-08-01 16:21:11 +00001902 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001903 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001904 if (!drv->bdrv_truncate)
1905 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001906 if (bs->read_only)
1907 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001908 if (bdrv_in_use(bs))
1909 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001910 ret = drv->bdrv_truncate(bs, offset);
1911 if (ret == 0) {
1912 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001913 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001914 }
1915 return ret;
bellard83f64092006-08-01 16:21:11 +00001916}
1917
1918/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001919 * Length of a allocated file in bytes. Sparse files are counted by actual
1920 * allocated space. Return < 0 if error or unknown.
1921 */
1922int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1923{
1924 BlockDriver *drv = bs->drv;
1925 if (!drv) {
1926 return -ENOMEDIUM;
1927 }
1928 if (drv->bdrv_get_allocated_file_size) {
1929 return drv->bdrv_get_allocated_file_size(bs);
1930 }
1931 if (bs->file) {
1932 return bdrv_get_allocated_file_size(bs->file);
1933 }
1934 return -ENOTSUP;
1935}
1936
1937/**
bellard83f64092006-08-01 16:21:11 +00001938 * Length of a file in bytes. Return < 0 if error or unknown.
1939 */
1940int64_t bdrv_getlength(BlockDriverState *bs)
1941{
1942 BlockDriver *drv = bs->drv;
1943 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001944 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001945
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001946 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001947 if (drv->bdrv_getlength) {
1948 return drv->bdrv_getlength(bs);
1949 }
bellard83f64092006-08-01 16:21:11 +00001950 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001951 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001952}
1953
bellard19cb3732006-08-19 11:45:59 +00001954/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001955void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001956{
bellard19cb3732006-08-19 11:45:59 +00001957 int64_t length;
1958 length = bdrv_getlength(bs);
1959 if (length < 0)
1960 length = 0;
1961 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001962 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001963 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001964}
bellardcf989512004-02-16 21:56:36 +00001965
aliguorif3d54fc2008-11-25 21:50:24 +00001966struct partition {
1967 uint8_t boot_ind; /* 0x80 - active */
1968 uint8_t head; /* starting head */
1969 uint8_t sector; /* starting sector */
1970 uint8_t cyl; /* starting cylinder */
1971 uint8_t sys_ind; /* What partition type */
1972 uint8_t end_head; /* end head */
1973 uint8_t end_sector; /* end sector */
1974 uint8_t end_cyl; /* end cylinder */
1975 uint32_t start_sect; /* starting sector counting from 0 */
1976 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001977} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001978
1979/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1980static int guess_disk_lchs(BlockDriverState *bs,
1981 int *pcylinders, int *pheads, int *psectors)
1982{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001983 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001984 int ret, i, heads, sectors, cylinders;
1985 struct partition *p;
1986 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001987 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001988 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00001989
1990 bdrv_get_geometry(bs, &nb_sectors);
1991
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001992 /**
1993 * The function will be invoked during startup not only in sync I/O mode,
1994 * but also in async I/O mode. So the I/O throttling function has to
1995 * be disabled temporarily here, not permanently.
1996 */
1997 enabled = bs->io_limits_enabled;
1998 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00001999 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002000 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002001 if (ret < 0)
2002 return -1;
2003 /* test msdos magic */
2004 if (buf[510] != 0x55 || buf[511] != 0xaa)
2005 return -1;
2006 for(i = 0; i < 4; i++) {
2007 p = ((struct partition *)(buf + 0x1be)) + i;
2008 nr_sects = le32_to_cpu(p->nr_sects);
2009 if (nr_sects && p->end_head) {
2010 /* We make the assumption that the partition terminates on
2011 a cylinder boundary */
2012 heads = p->end_head + 1;
2013 sectors = p->end_sector & 63;
2014 if (sectors == 0)
2015 continue;
2016 cylinders = nb_sectors / (heads * sectors);
2017 if (cylinders < 1 || cylinders > 16383)
2018 continue;
2019 *pheads = heads;
2020 *psectors = sectors;
2021 *pcylinders = cylinders;
2022#if 0
2023 printf("guessed geometry: LCHS=%d %d %d\n",
2024 cylinders, heads, sectors);
2025#endif
2026 return 0;
2027 }
2028 }
2029 return -1;
2030}
2031
2032void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2033{
2034 int translation, lba_detected = 0;
2035 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002036 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002037
2038 /* if a geometry hint is available, use it */
2039 bdrv_get_geometry(bs, &nb_sectors);
2040 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2041 translation = bdrv_get_translation_hint(bs);
2042 if (cylinders != 0) {
2043 *pcyls = cylinders;
2044 *pheads = heads;
2045 *psecs = secs;
2046 } else {
2047 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2048 if (heads > 16) {
2049 /* if heads > 16, it means that a BIOS LBA
2050 translation was active, so the default
2051 hardware geometry is OK */
2052 lba_detected = 1;
2053 goto default_geometry;
2054 } else {
2055 *pcyls = cylinders;
2056 *pheads = heads;
2057 *psecs = secs;
2058 /* disable any translation to be in sync with
2059 the logical geometry */
2060 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2061 bdrv_set_translation_hint(bs,
2062 BIOS_ATA_TRANSLATION_NONE);
2063 }
2064 }
2065 } else {
2066 default_geometry:
2067 /* if no geometry, use a standard physical disk geometry */
2068 cylinders = nb_sectors / (16 * 63);
2069
2070 if (cylinders > 16383)
2071 cylinders = 16383;
2072 else if (cylinders < 2)
2073 cylinders = 2;
2074 *pcyls = cylinders;
2075 *pheads = 16;
2076 *psecs = 63;
2077 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2078 if ((*pcyls * *pheads) <= 131072) {
2079 bdrv_set_translation_hint(bs,
2080 BIOS_ATA_TRANSLATION_LARGE);
2081 } else {
2082 bdrv_set_translation_hint(bs,
2083 BIOS_ATA_TRANSLATION_LBA);
2084 }
2085 }
2086 }
2087 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2088 }
2089}
2090
ths5fafdf22007-09-16 21:08:06 +00002091void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002092 int cyls, int heads, int secs)
2093{
2094 bs->cyls = cyls;
2095 bs->heads = heads;
2096 bs->secs = secs;
2097}
2098
bellard46d47672004-11-16 01:45:27 +00002099void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2100{
2101 bs->translation = translation;
2102}
2103
ths5fafdf22007-09-16 21:08:06 +00002104void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002105 int *pcyls, int *pheads, int *psecs)
2106{
2107 *pcyls = bs->cyls;
2108 *pheads = bs->heads;
2109 *psecs = bs->secs;
2110}
2111
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002112/* throttling disk io limits */
2113void bdrv_set_io_limits(BlockDriverState *bs,
2114 BlockIOLimit *io_limits)
2115{
2116 bs->io_limits = *io_limits;
2117 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2118}
2119
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002120/* Recognize floppy formats */
2121typedef struct FDFormat {
2122 FDriveType drive;
2123 uint8_t last_sect;
2124 uint8_t max_track;
2125 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002126 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002127} FDFormat;
2128
2129static const FDFormat fd_formats[] = {
2130 /* First entry is default format */
2131 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002132 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2133 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2134 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2135 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2136 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2137 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2138 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2139 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002140 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002141 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2142 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2143 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2144 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2145 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002146 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002147 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2148 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2149 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2150 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2151 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2152 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002153 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002154 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2155 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2156 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2157 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2158 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002159 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002160 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2161 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002162 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002163 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2164 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2165 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2166 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002167 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002168 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2169 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002170 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002171 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002172 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002173 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002174};
2175
2176void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2177 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002178 FDriveType drive_in, FDriveType *drive,
2179 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002180{
2181 const FDFormat *parse;
2182 uint64_t nb_sectors, size;
2183 int i, first_match, match;
2184
2185 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2186 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2187 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002188 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002189 } else {
2190 bdrv_get_geometry(bs, &nb_sectors);
2191 match = -1;
2192 first_match = -1;
2193 for (i = 0; ; i++) {
2194 parse = &fd_formats[i];
2195 if (parse->drive == FDRIVE_DRV_NONE) {
2196 break;
2197 }
2198 if (drive_in == parse->drive ||
2199 drive_in == FDRIVE_DRV_NONE) {
2200 size = (parse->max_head + 1) * parse->max_track *
2201 parse->last_sect;
2202 if (nb_sectors == size) {
2203 match = i;
2204 break;
2205 }
2206 if (first_match == -1) {
2207 first_match = i;
2208 }
2209 }
2210 }
2211 if (match == -1) {
2212 if (first_match == -1) {
2213 match = 1;
2214 } else {
2215 match = first_match;
2216 }
2217 parse = &fd_formats[match];
2218 }
2219 *nb_heads = parse->max_head + 1;
2220 *max_track = parse->max_track;
2221 *last_sect = parse->last_sect;
2222 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002223 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002224 }
2225}
2226
bellard46d47672004-11-16 01:45:27 +00002227int bdrv_get_translation_hint(BlockDriverState *bs)
2228{
2229 return bs->translation;
2230}
2231
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002232void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2233 BlockErrorAction on_write_error)
2234{
2235 bs->on_read_error = on_read_error;
2236 bs->on_write_error = on_write_error;
2237}
2238
2239BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2240{
2241 return is_read ? bs->on_read_error : bs->on_write_error;
2242}
2243
bellardb3380822004-03-14 21:38:54 +00002244int bdrv_is_read_only(BlockDriverState *bs)
2245{
2246 return bs->read_only;
2247}
2248
ths985a03b2007-12-24 16:10:43 +00002249int bdrv_is_sg(BlockDriverState *bs)
2250{
2251 return bs->sg;
2252}
2253
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002254int bdrv_enable_write_cache(BlockDriverState *bs)
2255{
2256 return bs->enable_write_cache;
2257}
2258
bellardea2384d2004-08-01 21:59:26 +00002259int bdrv_is_encrypted(BlockDriverState *bs)
2260{
2261 if (bs->backing_hd && bs->backing_hd->encrypted)
2262 return 1;
2263 return bs->encrypted;
2264}
2265
aliguoric0f4ce72009-03-05 23:01:01 +00002266int bdrv_key_required(BlockDriverState *bs)
2267{
2268 BlockDriverState *backing_hd = bs->backing_hd;
2269
2270 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2271 return 1;
2272 return (bs->encrypted && !bs->valid_key);
2273}
2274
bellardea2384d2004-08-01 21:59:26 +00002275int bdrv_set_key(BlockDriverState *bs, const char *key)
2276{
2277 int ret;
2278 if (bs->backing_hd && bs->backing_hd->encrypted) {
2279 ret = bdrv_set_key(bs->backing_hd, key);
2280 if (ret < 0)
2281 return ret;
2282 if (!bs->encrypted)
2283 return 0;
2284 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002285 if (!bs->encrypted) {
2286 return -EINVAL;
2287 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2288 return -ENOMEDIUM;
2289 }
aliguoric0f4ce72009-03-05 23:01:01 +00002290 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002291 if (ret < 0) {
2292 bs->valid_key = 0;
2293 } else if (!bs->valid_key) {
2294 bs->valid_key = 1;
2295 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002296 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002297 }
aliguoric0f4ce72009-03-05 23:01:01 +00002298 return ret;
bellardea2384d2004-08-01 21:59:26 +00002299}
2300
2301void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2302{
bellard19cb3732006-08-19 11:45:59 +00002303 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002304 buf[0] = '\0';
2305 } else {
2306 pstrcpy(buf, buf_size, bs->drv->format_name);
2307 }
2308}
2309
ths5fafdf22007-09-16 21:08:06 +00002310void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002311 void *opaque)
2312{
2313 BlockDriver *drv;
2314
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002315 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002316 it(opaque, drv->format_name);
2317 }
2318}
2319
bellardb3380822004-03-14 21:38:54 +00002320BlockDriverState *bdrv_find(const char *name)
2321{
2322 BlockDriverState *bs;
2323
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002324 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2325 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002326 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002327 }
bellardb3380822004-03-14 21:38:54 +00002328 }
2329 return NULL;
2330}
2331
Markus Armbruster2f399b02010-06-02 18:55:20 +02002332BlockDriverState *bdrv_next(BlockDriverState *bs)
2333{
2334 if (!bs) {
2335 return QTAILQ_FIRST(&bdrv_states);
2336 }
2337 return QTAILQ_NEXT(bs, list);
2338}
2339
aliguori51de9762009-03-05 23:00:43 +00002340void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002341{
2342 BlockDriverState *bs;
2343
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002344 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002345 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002346 }
2347}
2348
bellardea2384d2004-08-01 21:59:26 +00002349const char *bdrv_get_device_name(BlockDriverState *bs)
2350{
2351 return bs->device_name;
2352}
2353
aliguoric6ca28d2008-10-06 13:55:43 +00002354void bdrv_flush_all(void)
2355{
2356 BlockDriverState *bs;
2357
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002358 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002359 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002360 }
aliguoric6ca28d2008-10-06 13:55:43 +00002361}
2362
Kevin Wolff2feebb2010-04-14 17:30:35 +02002363int bdrv_has_zero_init(BlockDriverState *bs)
2364{
2365 assert(bs->drv);
2366
Kevin Wolf336c1c12010-07-28 11:26:29 +02002367 if (bs->drv->bdrv_has_zero_init) {
2368 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002369 }
2370
2371 return 1;
2372}
2373
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002374typedef struct BdrvCoIsAllocatedData {
2375 BlockDriverState *bs;
2376 int64_t sector_num;
2377 int nb_sectors;
2378 int *pnum;
2379 int ret;
2380 bool done;
2381} BdrvCoIsAllocatedData;
2382
thsf58c7b32008-06-05 21:53:49 +00002383/*
2384 * Returns true iff the specified sector is present in the disk image. Drivers
2385 * not implementing the functionality are assumed to not support backing files,
2386 * hence all their sectors are reported as allocated.
2387 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002388 * If 'sector_num' is beyond the end of the disk image the return value is 0
2389 * and 'pnum' is set to 0.
2390 *
thsf58c7b32008-06-05 21:53:49 +00002391 * 'pnum' is set to the number of sectors (including and immediately following
2392 * the specified sector) that are known to be in the same
2393 * allocated/unallocated state.
2394 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002395 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2396 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002397 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002398int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2399 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002400{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002401 int64_t n;
2402
2403 if (sector_num >= bs->total_sectors) {
2404 *pnum = 0;
2405 return 0;
2406 }
2407
2408 n = bs->total_sectors - sector_num;
2409 if (n < nb_sectors) {
2410 nb_sectors = n;
2411 }
2412
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002413 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002414 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002415 return 1;
2416 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002417
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002418 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2419}
2420
2421/* Coroutine wrapper for bdrv_is_allocated() */
2422static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2423{
2424 BdrvCoIsAllocatedData *data = opaque;
2425 BlockDriverState *bs = data->bs;
2426
2427 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2428 data->pnum);
2429 data->done = true;
2430}
2431
2432/*
2433 * Synchronous wrapper around bdrv_co_is_allocated().
2434 *
2435 * See bdrv_co_is_allocated() for details.
2436 */
2437int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2438 int *pnum)
2439{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002440 Coroutine *co;
2441 BdrvCoIsAllocatedData data = {
2442 .bs = bs,
2443 .sector_num = sector_num,
2444 .nb_sectors = nb_sectors,
2445 .pnum = pnum,
2446 .done = false,
2447 };
2448
2449 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2450 qemu_coroutine_enter(co, &data);
2451 while (!data.done) {
2452 qemu_aio_wait();
2453 }
2454 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002455}
2456
Luiz Capitulinob2023812011-09-21 17:16:47 -03002457BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002458{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002459 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002460 BlockDriverState *bs;
2461
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002462 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002463 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002464
Luiz Capitulinob2023812011-09-21 17:16:47 -03002465 info->value = g_malloc0(sizeof(*info->value));
2466 info->value->device = g_strdup(bs->device_name);
2467 info->value->type = g_strdup("unknown");
2468 info->value->locked = bdrv_dev_is_medium_locked(bs);
2469 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002470
Markus Armbrustere4def802011-09-06 18:58:53 +02002471 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002472 info->value->has_tray_open = true;
2473 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002474 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002475
2476 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002477 info->value->has_io_status = true;
2478 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002479 }
2480
bellard19cb3732006-08-19 11:45:59 +00002481 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002482 info->value->has_inserted = true;
2483 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2484 info->value->inserted->file = g_strdup(bs->filename);
2485 info->value->inserted->ro = bs->read_only;
2486 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2487 info->value->inserted->encrypted = bs->encrypted;
2488 if (bs->backing_file[0]) {
2489 info->value->inserted->has_backing_file = true;
2490 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002491 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002492
2493 if (bs->io_limits_enabled) {
2494 info->value->inserted->bps =
2495 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2496 info->value->inserted->bps_rd =
2497 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2498 info->value->inserted->bps_wr =
2499 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2500 info->value->inserted->iops =
2501 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2502 info->value->inserted->iops_rd =
2503 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2504 info->value->inserted->iops_wr =
2505 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2506 }
bellardb3380822004-03-14 21:38:54 +00002507 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002508
2509 /* XXX: waiting for the qapi to support GSList */
2510 if (!cur_item) {
2511 head = cur_item = info;
2512 } else {
2513 cur_item->next = info;
2514 cur_item = info;
2515 }
bellardb3380822004-03-14 21:38:54 +00002516 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002517
Luiz Capitulinob2023812011-09-21 17:16:47 -03002518 return head;
bellardb3380822004-03-14 21:38:54 +00002519}
thsa36e69d2007-12-02 05:18:19 +00002520
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002521/* Consider exposing this as a full fledged QMP command */
2522static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002523{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002524 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002525
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002526 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002527
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002528 if (bs->device_name[0]) {
2529 s->has_device = true;
2530 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002531 }
2532
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002533 s->stats = g_malloc0(sizeof(*s->stats));
2534 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2535 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2536 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2537 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2538 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2539 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2540 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2541 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2542 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2543
Kevin Wolf294cc352010-04-28 14:34:01 +02002544 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002545 s->has_parent = true;
2546 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002547 }
2548
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002549 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002550}
2551
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002552BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002553{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002554 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002555 BlockDriverState *bs;
2556
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002557 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002558 BlockStatsList *info = g_malloc0(sizeof(*info));
2559 info->value = qmp_query_blockstat(bs, NULL);
2560
2561 /* XXX: waiting for the qapi to support GSList */
2562 if (!cur_item) {
2563 head = cur_item = info;
2564 } else {
2565 cur_item->next = info;
2566 cur_item = info;
2567 }
thsa36e69d2007-12-02 05:18:19 +00002568 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002569
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002570 return head;
thsa36e69d2007-12-02 05:18:19 +00002571}
bellardea2384d2004-08-01 21:59:26 +00002572
aliguori045df332009-03-05 23:00:48 +00002573const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2574{
2575 if (bs->backing_hd && bs->backing_hd->encrypted)
2576 return bs->backing_file;
2577 else if (bs->encrypted)
2578 return bs->filename;
2579 else
2580 return NULL;
2581}
2582
ths5fafdf22007-09-16 21:08:06 +00002583void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002584 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002585{
Kevin Wolf3574c602011-10-26 11:02:11 +02002586 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002587}
2588
ths5fafdf22007-09-16 21:08:06 +00002589int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002590 const uint8_t *buf, int nb_sectors)
2591{
2592 BlockDriver *drv = bs->drv;
2593 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002594 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002595 if (!drv->bdrv_write_compressed)
2596 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002597 if (bdrv_check_request(bs, sector_num, nb_sectors))
2598 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002599
Jan Kiszkac6d22832009-11-30 18:21:20 +01002600 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002601 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2602 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002603
bellardfaea38e2006-08-05 21:31:00 +00002604 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2605}
ths3b46e622007-09-17 08:09:54 +00002606
bellardfaea38e2006-08-05 21:31:00 +00002607int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2608{
2609 BlockDriver *drv = bs->drv;
2610 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002611 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002612 if (!drv->bdrv_get_info)
2613 return -ENOTSUP;
2614 memset(bdi, 0, sizeof(*bdi));
2615 return drv->bdrv_get_info(bs, bdi);
2616}
2617
Christoph Hellwig45566e92009-07-10 23:11:57 +02002618int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2619 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002620{
2621 BlockDriver *drv = bs->drv;
2622 if (!drv)
2623 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002624 if (drv->bdrv_save_vmstate)
2625 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2626 if (bs->file)
2627 return bdrv_save_vmstate(bs->file, buf, pos, size);
2628 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002629}
2630
Christoph Hellwig45566e92009-07-10 23:11:57 +02002631int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2632 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002633{
2634 BlockDriver *drv = bs->drv;
2635 if (!drv)
2636 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002637 if (drv->bdrv_load_vmstate)
2638 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2639 if (bs->file)
2640 return bdrv_load_vmstate(bs->file, buf, pos, size);
2641 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002642}
2643
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002644void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2645{
2646 BlockDriver *drv = bs->drv;
2647
2648 if (!drv || !drv->bdrv_debug_event) {
2649 return;
2650 }
2651
2652 return drv->bdrv_debug_event(bs, event);
2653
2654}
2655
bellardfaea38e2006-08-05 21:31:00 +00002656/**************************************************************/
2657/* handling of snapshots */
2658
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002659int bdrv_can_snapshot(BlockDriverState *bs)
2660{
2661 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002662 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002663 return 0;
2664 }
2665
2666 if (!drv->bdrv_snapshot_create) {
2667 if (bs->file != NULL) {
2668 return bdrv_can_snapshot(bs->file);
2669 }
2670 return 0;
2671 }
2672
2673 return 1;
2674}
2675
Blue Swirl199630b2010-07-25 20:49:34 +00002676int bdrv_is_snapshot(BlockDriverState *bs)
2677{
2678 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2679}
2680
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002681BlockDriverState *bdrv_snapshots(void)
2682{
2683 BlockDriverState *bs;
2684
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002685 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002686 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002687 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002688
2689 bs = NULL;
2690 while ((bs = bdrv_next(bs))) {
2691 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002692 bs_snapshots = bs;
2693 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002694 }
2695 }
2696 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002697}
2698
ths5fafdf22007-09-16 21:08:06 +00002699int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002700 QEMUSnapshotInfo *sn_info)
2701{
2702 BlockDriver *drv = bs->drv;
2703 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002704 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002705 if (drv->bdrv_snapshot_create)
2706 return drv->bdrv_snapshot_create(bs, sn_info);
2707 if (bs->file)
2708 return bdrv_snapshot_create(bs->file, sn_info);
2709 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002710}
2711
ths5fafdf22007-09-16 21:08:06 +00002712int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002713 const char *snapshot_id)
2714{
2715 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002716 int ret, open_ret;
2717
bellardfaea38e2006-08-05 21:31:00 +00002718 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002719 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002720 if (drv->bdrv_snapshot_goto)
2721 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2722
2723 if (bs->file) {
2724 drv->bdrv_close(bs);
2725 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2726 open_ret = drv->bdrv_open(bs, bs->open_flags);
2727 if (open_ret < 0) {
2728 bdrv_delete(bs->file);
2729 bs->drv = NULL;
2730 return open_ret;
2731 }
2732 return ret;
2733 }
2734
2735 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002736}
2737
2738int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2739{
2740 BlockDriver *drv = bs->drv;
2741 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002742 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002743 if (drv->bdrv_snapshot_delete)
2744 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2745 if (bs->file)
2746 return bdrv_snapshot_delete(bs->file, snapshot_id);
2747 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002748}
2749
ths5fafdf22007-09-16 21:08:06 +00002750int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002751 QEMUSnapshotInfo **psn_info)
2752{
2753 BlockDriver *drv = bs->drv;
2754 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002755 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002756 if (drv->bdrv_snapshot_list)
2757 return drv->bdrv_snapshot_list(bs, psn_info);
2758 if (bs->file)
2759 return bdrv_snapshot_list(bs->file, psn_info);
2760 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002761}
2762
edison51ef6722010-09-21 19:58:41 -07002763int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2764 const char *snapshot_name)
2765{
2766 BlockDriver *drv = bs->drv;
2767 if (!drv) {
2768 return -ENOMEDIUM;
2769 }
2770 if (!bs->read_only) {
2771 return -EINVAL;
2772 }
2773 if (drv->bdrv_snapshot_load_tmp) {
2774 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2775 }
2776 return -ENOTSUP;
2777}
2778
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002779BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2780 const char *backing_file)
2781{
2782 if (!bs->drv) {
2783 return NULL;
2784 }
2785
2786 if (bs->backing_hd) {
2787 if (strcmp(bs->backing_file, backing_file) == 0) {
2788 return bs->backing_hd;
2789 } else {
2790 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2791 }
2792 }
2793
2794 return NULL;
2795}
2796
bellardfaea38e2006-08-05 21:31:00 +00002797#define NB_SUFFIXES 4
2798
2799char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2800{
2801 static const char suffixes[NB_SUFFIXES] = "KMGT";
2802 int64_t base;
2803 int i;
2804
2805 if (size <= 999) {
2806 snprintf(buf, buf_size, "%" PRId64, size);
2807 } else {
2808 base = 1024;
2809 for(i = 0; i < NB_SUFFIXES; i++) {
2810 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002811 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002812 (double)size / base,
2813 suffixes[i]);
2814 break;
2815 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002816 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002817 ((size + (base >> 1)) / base),
2818 suffixes[i]);
2819 break;
2820 }
2821 base = base * 1024;
2822 }
2823 }
2824 return buf;
2825}
2826
2827char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2828{
2829 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002830#ifdef _WIN32
2831 struct tm *ptm;
2832#else
bellardfaea38e2006-08-05 21:31:00 +00002833 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002834#endif
bellardfaea38e2006-08-05 21:31:00 +00002835 time_t ti;
2836 int64_t secs;
2837
2838 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002839 snprintf(buf, buf_size,
2840 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002841 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2842 } else {
2843 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002844#ifdef _WIN32
2845 ptm = localtime(&ti);
2846 strftime(date_buf, sizeof(date_buf),
2847 "%Y-%m-%d %H:%M:%S", ptm);
2848#else
bellardfaea38e2006-08-05 21:31:00 +00002849 localtime_r(&ti, &tm);
2850 strftime(date_buf, sizeof(date_buf),
2851 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002852#endif
bellardfaea38e2006-08-05 21:31:00 +00002853 secs = sn->vm_clock_nsec / 1000000000;
2854 snprintf(clock_buf, sizeof(clock_buf),
2855 "%02d:%02d:%02d.%03d",
2856 (int)(secs / 3600),
2857 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002858 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002859 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2860 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002861 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002862 sn->id_str, sn->name,
2863 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2864 date_buf,
2865 clock_buf);
2866 }
2867 return buf;
2868}
2869
bellard83f64092006-08-01 16:21:11 +00002870/**************************************************************/
2871/* async I/Os */
2872
aliguori3b69e4b2009-01-22 16:59:24 +00002873BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002874 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002875 BlockDriverCompletionFunc *cb, void *opaque)
2876{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002877 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2878
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002879 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002880 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002881}
2882
aliguorif141eaf2009-04-07 18:43:24 +00002883BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2884 QEMUIOVector *qiov, int nb_sectors,
2885 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002886{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002887 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2888
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002889 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002890 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002891}
2892
Kevin Wolf40b4f532009-09-09 17:53:37 +02002893
2894typedef struct MultiwriteCB {
2895 int error;
2896 int num_requests;
2897 int num_callbacks;
2898 struct {
2899 BlockDriverCompletionFunc *cb;
2900 void *opaque;
2901 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002902 } callbacks[];
2903} MultiwriteCB;
2904
2905static void multiwrite_user_cb(MultiwriteCB *mcb)
2906{
2907 int i;
2908
2909 for (i = 0; i < mcb->num_callbacks; i++) {
2910 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002911 if (mcb->callbacks[i].free_qiov) {
2912 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2913 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002914 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002915 }
2916}
2917
2918static void multiwrite_cb(void *opaque, int ret)
2919{
2920 MultiwriteCB *mcb = opaque;
2921
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002922 trace_multiwrite_cb(mcb, ret);
2923
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002924 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002925 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002926 }
2927
2928 mcb->num_requests--;
2929 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002930 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002931 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002932 }
2933}
2934
2935static int multiwrite_req_compare(const void *a, const void *b)
2936{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002937 const BlockRequest *req1 = a, *req2 = b;
2938
2939 /*
2940 * Note that we can't simply subtract req2->sector from req1->sector
2941 * here as that could overflow the return value.
2942 */
2943 if (req1->sector > req2->sector) {
2944 return 1;
2945 } else if (req1->sector < req2->sector) {
2946 return -1;
2947 } else {
2948 return 0;
2949 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002950}
2951
2952/*
2953 * Takes a bunch of requests and tries to merge them. Returns the number of
2954 * requests that remain after merging.
2955 */
2956static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2957 int num_reqs, MultiwriteCB *mcb)
2958{
2959 int i, outidx;
2960
2961 // Sort requests by start sector
2962 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2963
2964 // Check if adjacent requests touch the same clusters. If so, combine them,
2965 // filling up gaps with zero sectors.
2966 outidx = 0;
2967 for (i = 1; i < num_reqs; i++) {
2968 int merge = 0;
2969 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2970
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002971 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02002972 if (reqs[i].sector <= oldreq_last) {
2973 merge = 1;
2974 }
2975
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002976 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2977 merge = 0;
2978 }
2979
Kevin Wolf40b4f532009-09-09 17:53:37 +02002980 if (merge) {
2981 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002982 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002983 qemu_iovec_init(qiov,
2984 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2985
2986 // Add the first request to the merged one. If the requests are
2987 // overlapping, drop the last sectors of the first request.
2988 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2989 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2990
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002991 // We should need to add any zeros between the two requests
2992 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002993
2994 // Add the second request
2995 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2996
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002997 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002998 reqs[outidx].qiov = qiov;
2999
3000 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3001 } else {
3002 outidx++;
3003 reqs[outidx].sector = reqs[i].sector;
3004 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3005 reqs[outidx].qiov = reqs[i].qiov;
3006 }
3007 }
3008
3009 return outidx + 1;
3010}
3011
3012/*
3013 * Submit multiple AIO write requests at once.
3014 *
3015 * On success, the function returns 0 and all requests in the reqs array have
3016 * been submitted. In error case this function returns -1, and any of the
3017 * requests may or may not be submitted yet. In particular, this means that the
3018 * callback will be called for some of the requests, for others it won't. The
3019 * caller must check the error field of the BlockRequest to wait for the right
3020 * callbacks (if error != 0, no callback will be called).
3021 *
3022 * The implementation may modify the contents of the reqs array, e.g. to merge
3023 * requests. However, the fields opaque and error are left unmodified as they
3024 * are used to signal failure for a single request to the caller.
3025 */
3026int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3027{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003028 MultiwriteCB *mcb;
3029 int i;
3030
Ryan Harper301db7c2011-03-07 10:01:04 -06003031 /* don't submit writes if we don't have a medium */
3032 if (bs->drv == NULL) {
3033 for (i = 0; i < num_reqs; i++) {
3034 reqs[i].error = -ENOMEDIUM;
3035 }
3036 return -1;
3037 }
3038
Kevin Wolf40b4f532009-09-09 17:53:37 +02003039 if (num_reqs == 0) {
3040 return 0;
3041 }
3042
3043 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003044 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003045 mcb->num_requests = 0;
3046 mcb->num_callbacks = num_reqs;
3047
3048 for (i = 0; i < num_reqs; i++) {
3049 mcb->callbacks[i].cb = reqs[i].cb;
3050 mcb->callbacks[i].opaque = reqs[i].opaque;
3051 }
3052
3053 // Check for mergable requests
3054 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3055
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003056 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3057
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003058 /* Run the aio requests. */
3059 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003060 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003061 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003062 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003063 }
3064
3065 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003066}
3067
bellard83f64092006-08-01 16:21:11 +00003068void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003069{
aliguori6bbff9a2009-03-20 18:25:59 +00003070 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003071}
3072
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003073/* block I/O throttling */
3074static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3075 bool is_write, double elapsed_time, uint64_t *wait)
3076{
3077 uint64_t bps_limit = 0;
3078 double bytes_limit, bytes_base, bytes_res;
3079 double slice_time, wait_time;
3080
3081 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3082 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3083 } else if (bs->io_limits.bps[is_write]) {
3084 bps_limit = bs->io_limits.bps[is_write];
3085 } else {
3086 if (wait) {
3087 *wait = 0;
3088 }
3089
3090 return false;
3091 }
3092
3093 slice_time = bs->slice_end - bs->slice_start;
3094 slice_time /= (NANOSECONDS_PER_SECOND);
3095 bytes_limit = bps_limit * slice_time;
3096 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3097 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3098 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3099 }
3100
3101 /* bytes_base: the bytes of data which have been read/written; and
3102 * it is obtained from the history statistic info.
3103 * bytes_res: the remaining bytes of data which need to be read/written.
3104 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3105 * the total time for completing reading/writting all data.
3106 */
3107 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3108
3109 if (bytes_base + bytes_res <= bytes_limit) {
3110 if (wait) {
3111 *wait = 0;
3112 }
3113
3114 return false;
3115 }
3116
3117 /* Calc approx time to dispatch */
3118 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3119
3120 /* When the I/O rate at runtime exceeds the limits,
3121 * bs->slice_end need to be extended in order that the current statistic
3122 * info can be kept until the timer fire, so it is increased and tuned
3123 * based on the result of experiment.
3124 */
3125 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3126 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3127 if (wait) {
3128 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3129 }
3130
3131 return true;
3132}
3133
3134static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3135 double elapsed_time, uint64_t *wait)
3136{
3137 uint64_t iops_limit = 0;
3138 double ios_limit, ios_base;
3139 double slice_time, wait_time;
3140
3141 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3142 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3143 } else if (bs->io_limits.iops[is_write]) {
3144 iops_limit = bs->io_limits.iops[is_write];
3145 } else {
3146 if (wait) {
3147 *wait = 0;
3148 }
3149
3150 return false;
3151 }
3152
3153 slice_time = bs->slice_end - bs->slice_start;
3154 slice_time /= (NANOSECONDS_PER_SECOND);
3155 ios_limit = iops_limit * slice_time;
3156 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3157 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3158 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3159 }
3160
3161 if (ios_base + 1 <= ios_limit) {
3162 if (wait) {
3163 *wait = 0;
3164 }
3165
3166 return false;
3167 }
3168
3169 /* Calc approx time to dispatch */
3170 wait_time = (ios_base + 1) / iops_limit;
3171 if (wait_time > elapsed_time) {
3172 wait_time = wait_time - elapsed_time;
3173 } else {
3174 wait_time = 0;
3175 }
3176
3177 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3178 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3179 if (wait) {
3180 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3181 }
3182
3183 return true;
3184}
3185
3186static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3187 bool is_write, int64_t *wait)
3188{
3189 int64_t now, max_wait;
3190 uint64_t bps_wait = 0, iops_wait = 0;
3191 double elapsed_time;
3192 int bps_ret, iops_ret;
3193
3194 now = qemu_get_clock_ns(vm_clock);
3195 if ((bs->slice_start < now)
3196 && (bs->slice_end > now)) {
3197 bs->slice_end = now + bs->slice_time;
3198 } else {
3199 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3200 bs->slice_start = now;
3201 bs->slice_end = now + bs->slice_time;
3202
3203 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3204 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3205
3206 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3207 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3208 }
3209
3210 elapsed_time = now - bs->slice_start;
3211 elapsed_time /= (NANOSECONDS_PER_SECOND);
3212
3213 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3214 is_write, elapsed_time, &bps_wait);
3215 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3216 elapsed_time, &iops_wait);
3217 if (bps_ret || iops_ret) {
3218 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3219 if (wait) {
3220 *wait = max_wait;
3221 }
3222
3223 now = qemu_get_clock_ns(vm_clock);
3224 if (bs->slice_end < now + max_wait) {
3225 bs->slice_end = now + max_wait;
3226 }
3227
3228 return true;
3229 }
3230
3231 if (wait) {
3232 *wait = 0;
3233 }
3234
3235 return false;
3236}
pbrookce1a14d2006-08-07 02:38:06 +00003237
bellard83f64092006-08-01 16:21:11 +00003238/**************************************************************/
3239/* async block device emulation */
3240
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003241typedef struct BlockDriverAIOCBSync {
3242 BlockDriverAIOCB common;
3243 QEMUBH *bh;
3244 int ret;
3245 /* vector translation state */
3246 QEMUIOVector *qiov;
3247 uint8_t *bounce;
3248 int is_write;
3249} BlockDriverAIOCBSync;
3250
3251static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3252{
Kevin Wolfb666d232010-05-05 11:44:39 +02003253 BlockDriverAIOCBSync *acb =
3254 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003255 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003256 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003257 qemu_aio_release(acb);
3258}
3259
3260static AIOPool bdrv_em_aio_pool = {
3261 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3262 .cancel = bdrv_aio_cancel_em,
3263};
3264
bellard83f64092006-08-01 16:21:11 +00003265static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003266{
pbrookce1a14d2006-08-07 02:38:06 +00003267 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003268
aliguorif141eaf2009-04-07 18:43:24 +00003269 if (!acb->is_write)
3270 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003271 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003272 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003273 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003274 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003275 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003276}
bellardbeac80c2006-06-26 20:08:57 +00003277
aliguorif141eaf2009-04-07 18:43:24 +00003278static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3279 int64_t sector_num,
3280 QEMUIOVector *qiov,
3281 int nb_sectors,
3282 BlockDriverCompletionFunc *cb,
3283 void *opaque,
3284 int is_write)
3285
bellardea2384d2004-08-01 21:59:26 +00003286{
pbrookce1a14d2006-08-07 02:38:06 +00003287 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003288
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003289 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003290 acb->is_write = is_write;
3291 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003292 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003293 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003294
3295 if (is_write) {
3296 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003297 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003298 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003299 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003300 }
3301
pbrookce1a14d2006-08-07 02:38:06 +00003302 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003303
pbrookce1a14d2006-08-07 02:38:06 +00003304 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003305}
3306
aliguorif141eaf2009-04-07 18:43:24 +00003307static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3308 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003309 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003310{
aliguorif141eaf2009-04-07 18:43:24 +00003311 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003312}
3313
aliguorif141eaf2009-04-07 18:43:24 +00003314static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3315 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3316 BlockDriverCompletionFunc *cb, void *opaque)
3317{
3318 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3319}
3320
Kevin Wolf68485422011-06-30 10:05:46 +02003321
3322typedef struct BlockDriverAIOCBCoroutine {
3323 BlockDriverAIOCB common;
3324 BlockRequest req;
3325 bool is_write;
3326 QEMUBH* bh;
3327} BlockDriverAIOCBCoroutine;
3328
3329static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3330{
3331 qemu_aio_flush();
3332}
3333
3334static AIOPool bdrv_em_co_aio_pool = {
3335 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3336 .cancel = bdrv_aio_co_cancel_em,
3337};
3338
Paolo Bonzini35246a62011-10-14 10:41:29 +02003339static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003340{
3341 BlockDriverAIOCBCoroutine *acb = opaque;
3342
3343 acb->common.cb(acb->common.opaque, acb->req.error);
3344 qemu_bh_delete(acb->bh);
3345 qemu_aio_release(acb);
3346}
3347
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003348/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3349static void coroutine_fn bdrv_co_do_rw(void *opaque)
3350{
3351 BlockDriverAIOCBCoroutine *acb = opaque;
3352 BlockDriverState *bs = acb->common.bs;
3353
3354 if (!acb->is_write) {
3355 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003356 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003357 } else {
3358 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003359 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003360 }
3361
Paolo Bonzini35246a62011-10-14 10:41:29 +02003362 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003363 qemu_bh_schedule(acb->bh);
3364}
3365
Kevin Wolf68485422011-06-30 10:05:46 +02003366static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3367 int64_t sector_num,
3368 QEMUIOVector *qiov,
3369 int nb_sectors,
3370 BlockDriverCompletionFunc *cb,
3371 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003372 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003373{
3374 Coroutine *co;
3375 BlockDriverAIOCBCoroutine *acb;
3376
3377 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3378 acb->req.sector = sector_num;
3379 acb->req.nb_sectors = nb_sectors;
3380 acb->req.qiov = qiov;
3381 acb->is_write = is_write;
3382
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003383 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003384 qemu_coroutine_enter(co, acb);
3385
3386 return &acb->common;
3387}
3388
Paolo Bonzini07f07612011-10-17 12:32:12 +02003389static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003390{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003391 BlockDriverAIOCBCoroutine *acb = opaque;
3392 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003393
Paolo Bonzini07f07612011-10-17 12:32:12 +02003394 acb->req.error = bdrv_co_flush(bs);
3395 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003396 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003397}
3398
Paolo Bonzini07f07612011-10-17 12:32:12 +02003399BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003400 BlockDriverCompletionFunc *cb, void *opaque)
3401{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003402 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003403
Paolo Bonzini07f07612011-10-17 12:32:12 +02003404 Coroutine *co;
3405 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003406
Paolo Bonzini07f07612011-10-17 12:32:12 +02003407 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3408 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3409 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003410
Alexander Graf016f5cf2010-05-26 17:51:49 +02003411 return &acb->common;
3412}
3413
Paolo Bonzini4265d622011-10-17 12:32:14 +02003414static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3415{
3416 BlockDriverAIOCBCoroutine *acb = opaque;
3417 BlockDriverState *bs = acb->common.bs;
3418
3419 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3420 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3421 qemu_bh_schedule(acb->bh);
3422}
3423
3424BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3425 int64_t sector_num, int nb_sectors,
3426 BlockDriverCompletionFunc *cb, void *opaque)
3427{
3428 Coroutine *co;
3429 BlockDriverAIOCBCoroutine *acb;
3430
3431 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3432
3433 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3434 acb->req.sector = sector_num;
3435 acb->req.nb_sectors = nb_sectors;
3436 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3437 qemu_coroutine_enter(co, acb);
3438
3439 return &acb->common;
3440}
3441
bellardea2384d2004-08-01 21:59:26 +00003442void bdrv_init(void)
3443{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003444 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003445}
pbrookce1a14d2006-08-07 02:38:06 +00003446
Markus Armbrustereb852012009-10-27 18:41:44 +01003447void bdrv_init_with_whitelist(void)
3448{
3449 use_bdrv_whitelist = 1;
3450 bdrv_init();
3451}
3452
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003453void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3454 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003455{
pbrookce1a14d2006-08-07 02:38:06 +00003456 BlockDriverAIOCB *acb;
3457
aliguori6bbff9a2009-03-20 18:25:59 +00003458 if (pool->free_aiocb) {
3459 acb = pool->free_aiocb;
3460 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003461 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003462 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003463 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003464 }
3465 acb->bs = bs;
3466 acb->cb = cb;
3467 acb->opaque = opaque;
3468 return acb;
3469}
3470
3471void qemu_aio_release(void *p)
3472{
aliguori6bbff9a2009-03-20 18:25:59 +00003473 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3474 AIOPool *pool = acb->pool;
3475 acb->next = pool->free_aiocb;
3476 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003477}
bellard19cb3732006-08-19 11:45:59 +00003478
3479/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003480/* Coroutine block device emulation */
3481
3482typedef struct CoroutineIOCompletion {
3483 Coroutine *coroutine;
3484 int ret;
3485} CoroutineIOCompletion;
3486
3487static void bdrv_co_io_em_complete(void *opaque, int ret)
3488{
3489 CoroutineIOCompletion *co = opaque;
3490
3491 co->ret = ret;
3492 qemu_coroutine_enter(co->coroutine, NULL);
3493}
3494
3495static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3496 int nb_sectors, QEMUIOVector *iov,
3497 bool is_write)
3498{
3499 CoroutineIOCompletion co = {
3500 .coroutine = qemu_coroutine_self(),
3501 };
3502 BlockDriverAIOCB *acb;
3503
3504 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003505 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3506 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003507 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003508 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3509 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003510 }
3511
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003512 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003513 if (!acb) {
3514 return -EIO;
3515 }
3516 qemu_coroutine_yield();
3517
3518 return co.ret;
3519}
3520
3521static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3522 int64_t sector_num, int nb_sectors,
3523 QEMUIOVector *iov)
3524{
3525 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3526}
3527
3528static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3529 int64_t sector_num, int nb_sectors,
3530 QEMUIOVector *iov)
3531{
3532 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3533}
3534
Paolo Bonzini07f07612011-10-17 12:32:12 +02003535static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003536{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003537 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003538
Paolo Bonzini07f07612011-10-17 12:32:12 +02003539 rwco->ret = bdrv_co_flush(rwco->bs);
3540}
3541
3542int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3543{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003544 int ret;
3545
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003546 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003547 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003548 }
3549
Kevin Wolfca716362011-11-10 18:13:59 +01003550 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003551 if (bs->drv->bdrv_co_flush_to_os) {
3552 ret = bs->drv->bdrv_co_flush_to_os(bs);
3553 if (ret < 0) {
3554 return ret;
3555 }
3556 }
3557
Kevin Wolfca716362011-11-10 18:13:59 +01003558 /* But don't actually force it to the disk with cache=unsafe */
3559 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3560 return 0;
3561 }
3562
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003563 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003564 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003565 } else if (bs->drv->bdrv_aio_flush) {
3566 BlockDriverAIOCB *acb;
3567 CoroutineIOCompletion co = {
3568 .coroutine = qemu_coroutine_self(),
3569 };
3570
3571 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3572 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003573 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003574 } else {
3575 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003576 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003577 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003578 } else {
3579 /*
3580 * Some block drivers always operate in either writethrough or unsafe
3581 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3582 * know how the server works (because the behaviour is hardcoded or
3583 * depends on server-side configuration), so we can't ensure that
3584 * everything is safe on disk. Returning an error doesn't work because
3585 * that would break guests even if the server operates in writethrough
3586 * mode.
3587 *
3588 * Let's hope the user knows what he's doing.
3589 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003590 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003591 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003592 if (ret < 0) {
3593 return ret;
3594 }
3595
3596 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3597 * in the case of cache=unsafe, so there are no useless flushes.
3598 */
3599 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003600}
3601
Anthony Liguori0f154232011-11-14 15:09:45 -06003602void bdrv_invalidate_cache(BlockDriverState *bs)
3603{
3604 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3605 bs->drv->bdrv_invalidate_cache(bs);
3606 }
3607}
3608
3609void bdrv_invalidate_cache_all(void)
3610{
3611 BlockDriverState *bs;
3612
3613 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3614 bdrv_invalidate_cache(bs);
3615 }
3616}
3617
Paolo Bonzini07f07612011-10-17 12:32:12 +02003618int bdrv_flush(BlockDriverState *bs)
3619{
3620 Coroutine *co;
3621 RwCo rwco = {
3622 .bs = bs,
3623 .ret = NOT_DONE,
3624 };
3625
3626 if (qemu_in_coroutine()) {
3627 /* Fast-path if already in coroutine context */
3628 bdrv_flush_co_entry(&rwco);
3629 } else {
3630 co = qemu_coroutine_create(bdrv_flush_co_entry);
3631 qemu_coroutine_enter(co, &rwco);
3632 while (rwco.ret == NOT_DONE) {
3633 qemu_aio_wait();
3634 }
3635 }
3636
3637 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003638}
3639
Paolo Bonzini4265d622011-10-17 12:32:14 +02003640static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3641{
3642 RwCo *rwco = opaque;
3643
3644 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3645}
3646
3647int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3648 int nb_sectors)
3649{
3650 if (!bs->drv) {
3651 return -ENOMEDIUM;
3652 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3653 return -EIO;
3654 } else if (bs->read_only) {
3655 return -EROFS;
3656 } else if (bs->drv->bdrv_co_discard) {
3657 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3658 } else if (bs->drv->bdrv_aio_discard) {
3659 BlockDriverAIOCB *acb;
3660 CoroutineIOCompletion co = {
3661 .coroutine = qemu_coroutine_self(),
3662 };
3663
3664 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3665 bdrv_co_io_em_complete, &co);
3666 if (acb == NULL) {
3667 return -EIO;
3668 } else {
3669 qemu_coroutine_yield();
3670 return co.ret;
3671 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003672 } else {
3673 return 0;
3674 }
3675}
3676
3677int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3678{
3679 Coroutine *co;
3680 RwCo rwco = {
3681 .bs = bs,
3682 .sector_num = sector_num,
3683 .nb_sectors = nb_sectors,
3684 .ret = NOT_DONE,
3685 };
3686
3687 if (qemu_in_coroutine()) {
3688 /* Fast-path if already in coroutine context */
3689 bdrv_discard_co_entry(&rwco);
3690 } else {
3691 co = qemu_coroutine_create(bdrv_discard_co_entry);
3692 qemu_coroutine_enter(co, &rwco);
3693 while (rwco.ret == NOT_DONE) {
3694 qemu_aio_wait();
3695 }
3696 }
3697
3698 return rwco.ret;
3699}
3700
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003701/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003702/* removable device support */
3703
3704/**
3705 * Return TRUE if the media is present
3706 */
3707int bdrv_is_inserted(BlockDriverState *bs)
3708{
3709 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003710
bellard19cb3732006-08-19 11:45:59 +00003711 if (!drv)
3712 return 0;
3713 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003714 return 1;
3715 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003716}
3717
3718/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003719 * Return whether the media changed since the last call to this
3720 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003721 */
3722int bdrv_media_changed(BlockDriverState *bs)
3723{
3724 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003725
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003726 if (drv && drv->bdrv_media_changed) {
3727 return drv->bdrv_media_changed(bs);
3728 }
3729 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003730}
3731
3732/**
3733 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3734 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003735void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003736{
3737 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003738
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003739 if (drv && drv->bdrv_eject) {
3740 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003741 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003742
3743 if (bs->device_name[0] != '\0') {
3744 bdrv_emit_qmp_eject_event(bs, eject_flag);
3745 }
bellard19cb3732006-08-19 11:45:59 +00003746}
3747
bellard19cb3732006-08-19 11:45:59 +00003748/**
3749 * Lock or unlock the media (if it is locked, the user won't be able
3750 * to eject it manually).
3751 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003752void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003753{
3754 BlockDriver *drv = bs->drv;
3755
Markus Armbruster025e8492011-09-06 18:58:47 +02003756 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003757
Markus Armbruster025e8492011-09-06 18:58:47 +02003758 if (drv && drv->bdrv_lock_medium) {
3759 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003760 }
3761}
ths985a03b2007-12-24 16:10:43 +00003762
3763/* needed for generic scsi interface */
3764
3765int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3766{
3767 BlockDriver *drv = bs->drv;
3768
3769 if (drv && drv->bdrv_ioctl)
3770 return drv->bdrv_ioctl(bs, req, buf);
3771 return -ENOTSUP;
3772}
aliguori7d780662009-03-12 19:57:08 +00003773
aliguori221f7152009-03-28 17:28:41 +00003774BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3775 unsigned long int req, void *buf,
3776 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003777{
aliguori221f7152009-03-28 17:28:41 +00003778 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003779
aliguori221f7152009-03-28 17:28:41 +00003780 if (drv && drv->bdrv_aio_ioctl)
3781 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3782 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003783}
aliguorie268ca52009-04-22 20:20:00 +00003784
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003785void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3786{
3787 bs->buffer_alignment = align;
3788}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003789
aliguorie268ca52009-04-22 20:20:00 +00003790void *qemu_blockalign(BlockDriverState *bs, size_t size)
3791{
3792 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3793}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003794
3795void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3796{
3797 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003798
Liran Schouraaa0eb72010-01-26 10:31:48 +02003799 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003800 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003801 if (!bs->dirty_bitmap) {
3802 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3803 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3804 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003805
Anthony Liguori7267c092011-08-20 22:09:37 -05003806 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003807 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003808 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003809 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003810 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003811 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003812 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003813 }
3814}
3815
3816int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3817{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003818 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003819
Jan Kiszkac6d22832009-11-30 18:21:20 +01003820 if (bs->dirty_bitmap &&
3821 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003822 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3823 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003824 } else {
3825 return 0;
3826 }
3827}
3828
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003829void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3830 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003831{
3832 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3833}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003834
3835int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3836{
3837 return bs->dirty_count;
3838}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003839
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003840void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3841{
3842 assert(bs->in_use != in_use);
3843 bs->in_use = in_use;
3844}
3845
3846int bdrv_in_use(BlockDriverState *bs)
3847{
3848 return bs->in_use;
3849}
3850
Luiz Capitulino28a72822011-09-26 17:43:50 -03003851void bdrv_iostatus_enable(BlockDriverState *bs)
3852{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003853 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003854 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003855}
3856
3857/* The I/O status is only enabled if the drive explicitly
3858 * enables it _and_ the VM is configured to stop on errors */
3859bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3860{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003861 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003862 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3863 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3864 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3865}
3866
3867void bdrv_iostatus_disable(BlockDriverState *bs)
3868{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003869 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003870}
3871
3872void bdrv_iostatus_reset(BlockDriverState *bs)
3873{
3874 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003875 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003876 }
3877}
3878
3879/* XXX: Today this is set by device models because it makes the implementation
3880 quite simple. However, the block layer knows about the error, so it's
3881 possible to implement this without device models being involved */
3882void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3883{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003884 if (bdrv_iostatus_is_enabled(bs) &&
3885 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003886 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003887 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3888 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003889 }
3890}
3891
Christoph Hellwiga597e792011-08-25 08:26:01 +02003892void
3893bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3894 enum BlockAcctType type)
3895{
3896 assert(type < BDRV_MAX_IOTYPE);
3897
3898 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003899 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003900 cookie->type = type;
3901}
3902
3903void
3904bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3905{
3906 assert(cookie->type < BDRV_MAX_IOTYPE);
3907
3908 bs->nr_bytes[cookie->type] += cookie->bytes;
3909 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003910 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003911}
3912
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003913int bdrv_img_create(const char *filename, const char *fmt,
3914 const char *base_filename, const char *base_fmt,
3915 char *options, uint64_t img_size, int flags)
3916{
3917 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003918 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003919 BlockDriverState *bs = NULL;
3920 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003921 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003922 int ret = 0;
3923
3924 /* Find driver and parse its options */
3925 drv = bdrv_find_format(fmt);
3926 if (!drv) {
3927 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003928 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003929 goto out;
3930 }
3931
3932 proto_drv = bdrv_find_protocol(filename);
3933 if (!proto_drv) {
3934 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003935 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003936 goto out;
3937 }
3938
3939 create_options = append_option_parameters(create_options,
3940 drv->create_options);
3941 create_options = append_option_parameters(create_options,
3942 proto_drv->create_options);
3943
3944 /* Create parameter list with default values */
3945 param = parse_option_parameters("", create_options, param);
3946
3947 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3948
3949 /* Parse -o options */
3950 if (options) {
3951 param = parse_option_parameters(options, create_options, param);
3952 if (param == NULL) {
3953 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003954 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003955 goto out;
3956 }
3957 }
3958
3959 if (base_filename) {
3960 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3961 base_filename)) {
3962 error_report("Backing file not supported for file format '%s'",
3963 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003964 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003965 goto out;
3966 }
3967 }
3968
3969 if (base_fmt) {
3970 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3971 error_report("Backing file format not supported for file "
3972 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003973 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003974 goto out;
3975 }
3976 }
3977
Jes Sorensen792da932010-12-16 13:52:17 +01003978 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3979 if (backing_file && backing_file->value.s) {
3980 if (!strcmp(filename, backing_file->value.s)) {
3981 error_report("Error: Trying to create an image with the "
3982 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003983 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003984 goto out;
3985 }
3986 }
3987
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003988 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3989 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003990 backing_drv = bdrv_find_format(backing_fmt->value.s);
3991 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003992 error_report("Unknown backing file format '%s'",
3993 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003994 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003995 goto out;
3996 }
3997 }
3998
3999 // The size for the image must always be specified, with one exception:
4000 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004001 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4002 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004003 if (backing_file && backing_file->value.s) {
4004 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004005 char buf[32];
4006
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004007 bs = bdrv_new("");
4008
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004009 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004010 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004011 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004012 goto out;
4013 }
4014 bdrv_get_geometry(bs, &size);
4015 size *= 512;
4016
4017 snprintf(buf, sizeof(buf), "%" PRId64, size);
4018 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4019 } else {
4020 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004021 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004022 goto out;
4023 }
4024 }
4025
4026 printf("Formatting '%s', fmt=%s ", filename, fmt);
4027 print_option_parameters(param);
4028 puts("");
4029
4030 ret = bdrv_create(drv, filename, param);
4031
4032 if (ret < 0) {
4033 if (ret == -ENOTSUP) {
4034 error_report("Formatting or formatting option not supported for "
4035 "file format '%s'", fmt);
4036 } else if (ret == -EFBIG) {
4037 error_report("The image size is too large for file format '%s'",
4038 fmt);
4039 } else {
4040 error_report("%s: error while creating %s: %s", filename, fmt,
4041 strerror(-ret));
4042 }
4043 }
4044
4045out:
4046 free_option_parameters(create_options);
4047 free_option_parameters(param);
4048
4049 if (bs) {
4050 bdrv_delete(bs);
4051 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004052
4053 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004054}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004055
4056void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
4057 BlockDriverCompletionFunc *cb, void *opaque)
4058{
4059 BlockJob *job;
4060
4061 if (bs->job || bdrv_in_use(bs)) {
4062 return NULL;
4063 }
4064 bdrv_set_in_use(bs, 1);
4065
4066 job = g_malloc0(job_type->instance_size);
4067 job->job_type = job_type;
4068 job->bs = bs;
4069 job->cb = cb;
4070 job->opaque = opaque;
4071 bs->job = job;
4072 return job;
4073}
4074
4075void block_job_complete(BlockJob *job, int ret)
4076{
4077 BlockDriverState *bs = job->bs;
4078
4079 assert(bs->job == job);
4080 job->cb(job->opaque, ret);
4081 bs->job = NULL;
4082 g_free(job);
4083 bdrv_set_in_use(bs, 0);
4084}
4085
4086int block_job_set_speed(BlockJob *job, int64_t value)
4087{
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004088 int rc;
4089
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004090 if (!job->job_type->set_speed) {
4091 return -ENOTSUP;
4092 }
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004093 rc = job->job_type->set_speed(job, value);
4094 if (rc == 0) {
4095 job->speed = value;
4096 }
4097 return rc;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004098}
4099
4100void block_job_cancel(BlockJob *job)
4101{
4102 job->cancelled = true;
4103}
4104
4105bool block_job_is_cancelled(BlockJob *job)
4106{
4107 return job->cancelled;
4108}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004109
4110void block_job_cancel_sync(BlockJob *job)
4111{
4112 BlockDriverState *bs = job->bs;
4113
4114 assert(bs->job == job);
4115 block_job_cancel(job);
4116 while (bs->job != NULL && bs->job->busy) {
4117 qemu_aio_wait();
4118 }
4119}