blob: 0fb188f763663ff2ef86446202d01b9ee328096a [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010083static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
bellardec530c82006-04-25 22:36:06 +000085
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080086static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010093static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000095
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010096static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000098
Markus Armbrusterf9092b12010-06-25 10:33:39 +020099/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
Markus Armbrustereb852012009-10-27 18:41:44 +0100102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800125/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
201#ifdef _WIN32
202 if (is_windows_drive(path) ||
203 is_windows_drive_prefix(path)) {
204 return 0;
205 }
206#endif
207
208 return strchr(path, ':') != NULL;
209}
210
bellard83f64092006-08-01 16:21:11 +0000211int path_is_absolute(const char *path)
212{
bellard21664422007-01-07 18:22:37 +0000213#ifdef _WIN32
214 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200215 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000216 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200217 }
218 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000219#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200220 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000221#endif
bellard83f64092006-08-01 16:21:11 +0000222}
223
224/* if filename is absolute, just copy it to dest. Otherwise, build a
225 path to it by considering it is relative to base_path. URL are
226 supported. */
227void path_combine(char *dest, int dest_size,
228 const char *base_path,
229 const char *filename)
230{
231 const char *p, *p1;
232 int len;
233
234 if (dest_size <= 0)
235 return;
236 if (path_is_absolute(filename)) {
237 pstrcpy(dest, dest_size, filename);
238 } else {
239 p = strchr(base_path, ':');
240 if (p)
241 p++;
242 else
243 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000244 p1 = strrchr(base_path, '/');
245#ifdef _WIN32
246 {
247 const char *p2;
248 p2 = strrchr(base_path, '\\');
249 if (!p1 || p2 > p1)
250 p1 = p2;
251 }
252#endif
bellard83f64092006-08-01 16:21:11 +0000253 if (p1)
254 p1++;
255 else
256 p1 = base_path;
257 if (p1 > p)
258 p = p1;
259 len = p - base_path;
260 if (len > dest_size - 1)
261 len = dest_size - 1;
262 memcpy(dest, base_path, len);
263 dest[len] = '\0';
264 pstrcat(dest, dest_size, filename);
265 }
266}
267
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500268void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000269{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100270 /* Block drivers without coroutine functions need emulation */
271 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200272 bdrv->bdrv_co_readv = bdrv_co_readv_em;
273 bdrv->bdrv_co_writev = bdrv_co_writev_em;
274
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100275 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
276 * the block driver lacks aio we need to emulate that too.
277 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200278 if (!bdrv->bdrv_aio_readv) {
279 /* add AIO emulation layer */
280 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
281 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200282 }
bellard83f64092006-08-01 16:21:11 +0000283 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200284
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100285 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000286}
bellardb3380822004-03-14 21:38:54 +0000287
288/* create a new block device (by default it is empty) */
289BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000290{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100291 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000292
Anthony Liguori7267c092011-08-20 22:09:37 -0500293 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000294 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000295 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000297 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300298 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000299 return bs;
300}
301
bellardea2384d2004-08-01 21:59:26 +0000302BlockDriver *bdrv_find_format(const char *format_name)
303{
304 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100305 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
306 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000307 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100308 }
bellardea2384d2004-08-01 21:59:26 +0000309 }
310 return NULL;
311}
312
Markus Armbrustereb852012009-10-27 18:41:44 +0100313static int bdrv_is_whitelisted(BlockDriver *drv)
314{
315 static const char *whitelist[] = {
316 CONFIG_BDRV_WHITELIST
317 };
318 const char **p;
319
320 if (!whitelist[0])
321 return 1; /* no whitelist, anything goes */
322
323 for (p = whitelist; *p; p++) {
324 if (!strcmp(drv->format_name, *p)) {
325 return 1;
326 }
327 }
328 return 0;
329}
330
331BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
332{
333 BlockDriver *drv = bdrv_find_format(format_name);
334 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
335}
336
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800337typedef struct CreateCo {
338 BlockDriver *drv;
339 char *filename;
340 QEMUOptionParameter *options;
341 int ret;
342} CreateCo;
343
344static void coroutine_fn bdrv_create_co_entry(void *opaque)
345{
346 CreateCo *cco = opaque;
347 assert(cco->drv);
348
349 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
350}
351
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200352int bdrv_create(BlockDriver *drv, const char* filename,
353 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000354{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800355 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200356
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800357 Coroutine *co;
358 CreateCo cco = {
359 .drv = drv,
360 .filename = g_strdup(filename),
361 .options = options,
362 .ret = NOT_DONE,
363 };
364
365 if (!drv->bdrv_create) {
366 return -ENOTSUP;
367 }
368
369 if (qemu_in_coroutine()) {
370 /* Fast-path if already in coroutine context */
371 bdrv_create_co_entry(&cco);
372 } else {
373 co = qemu_coroutine_create(bdrv_create_co_entry);
374 qemu_coroutine_enter(co, &cco);
375 while (cco.ret == NOT_DONE) {
376 qemu_aio_wait();
377 }
378 }
379
380 ret = cco.ret;
381 g_free(cco.filename);
382
383 return ret;
bellardea2384d2004-08-01 21:59:26 +0000384}
385
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200386int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
387{
388 BlockDriver *drv;
389
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900390 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200391 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000392 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200393 }
394
395 return bdrv_create(drv, filename, options);
396}
397
bellardd5249392004-08-03 21:14:23 +0000398#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000399void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000400{
bellard3b9f94e2007-01-07 17:27:07 +0000401 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000402
bellard3b9f94e2007-01-07 17:27:07 +0000403 GetTempPath(MAX_PATH, temp_dir);
404 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000405}
406#else
bellard95389c82005-12-18 18:28:15 +0000407void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000408{
409 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000410 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000411 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000412 tmpdir = getenv("TMPDIR");
413 if (!tmpdir)
414 tmpdir = "/tmp";
415 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000416 fd = mkstemp(filename);
417 close(fd);
418}
bellardd5249392004-08-03 21:14:23 +0000419#endif
bellardea2384d2004-08-01 21:59:26 +0000420
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200421/*
422 * Detect host devices. By convention, /dev/cdrom[N] is always
423 * recognized as a host CDROM.
424 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200425static BlockDriver *find_hdev_driver(const char *filename)
426{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200427 int score_max = 0, score;
428 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200429
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100430 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200431 if (d->bdrv_probe_device) {
432 score = d->bdrv_probe_device(filename);
433 if (score > score_max) {
434 score_max = score;
435 drv = d;
436 }
437 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200438 }
439
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200440 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200441}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200442
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900443BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200444{
445 BlockDriver *drv1;
446 char protocol[128];
447 int len;
448 const char *p;
449
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200450 /* TODO Drivers without bdrv_file_open must be specified explicitly */
451
Christoph Hellwig39508e72010-06-23 12:25:17 +0200452 /*
453 * XXX(hch): we really should not let host device detection
454 * override an explicit protocol specification, but moving this
455 * later breaks access to device names with colons in them.
456 * Thanks to the brain-dead persistent naming schemes on udev-
457 * based Linux systems those actually are quite common.
458 */
459 drv1 = find_hdev_driver(filename);
460 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200461 return drv1;
462 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200463
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000464 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200465 return bdrv_find_format("file");
466 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000467 p = strchr(filename, ':');
468 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200469 len = p - filename;
470 if (len > sizeof(protocol) - 1)
471 len = sizeof(protocol) - 1;
472 memcpy(protocol, filename, len);
473 protocol[len] = '\0';
474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
475 if (drv1->protocol_name &&
476 !strcmp(drv1->protocol_name, protocol)) {
477 return drv1;
478 }
479 }
480 return NULL;
481}
482
Stefan Weilc98ac352010-07-21 21:51:51 +0200483static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000484{
bellard83f64092006-08-01 16:21:11 +0000485 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000486 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000487 uint8_t buf[2048];
488 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000489
Naphtali Spreif5edb012010-01-17 16:48:13 +0200490 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200491 if (ret < 0) {
492 *pdrv = NULL;
493 return ret;
494 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700495
Kevin Wolf08a00552010-06-01 18:37:31 +0200496 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
497 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700498 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200499 drv = bdrv_find_format("raw");
500 if (!drv) {
501 ret = -ENOENT;
502 }
503 *pdrv = drv;
504 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700505 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700506
bellard83f64092006-08-01 16:21:11 +0000507 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
508 bdrv_delete(bs);
509 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200510 *pdrv = NULL;
511 return ret;
bellard83f64092006-08-01 16:21:11 +0000512 }
513
bellardea2384d2004-08-01 21:59:26 +0000514 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200515 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100516 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000517 if (drv1->bdrv_probe) {
518 score = drv1->bdrv_probe(buf, ret, filename);
519 if (score > score_max) {
520 score_max = score;
521 drv = drv1;
522 }
bellardea2384d2004-08-01 21:59:26 +0000523 }
524 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200525 if (!drv) {
526 ret = -ENOENT;
527 }
528 *pdrv = drv;
529 return ret;
bellardea2384d2004-08-01 21:59:26 +0000530}
531
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100532/**
533 * Set the current 'total_sectors' value
534 */
535static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
536{
537 BlockDriver *drv = bs->drv;
538
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700539 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
540 if (bs->sg)
541 return 0;
542
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100543 /* query actual device if possible, otherwise just trust the hint */
544 if (drv->bdrv_getlength) {
545 int64_t length = drv->bdrv_getlength(bs);
546 if (length < 0) {
547 return length;
548 }
549 hint = length >> BDRV_SECTOR_BITS;
550 }
551
552 bs->total_sectors = hint;
553 return 0;
554}
555
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100556/**
557 * Set open flags for a given cache mode
558 *
559 * Return 0 on success, -1 if the cache mode was invalid.
560 */
561int bdrv_parse_cache_flags(const char *mode, int *flags)
562{
563 *flags &= ~BDRV_O_CACHE_MASK;
564
565 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
566 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100567 } else if (!strcmp(mode, "directsync")) {
568 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100569 } else if (!strcmp(mode, "writeback")) {
570 *flags |= BDRV_O_CACHE_WB;
571 } else if (!strcmp(mode, "unsafe")) {
572 *flags |= BDRV_O_CACHE_WB;
573 *flags |= BDRV_O_NO_FLUSH;
574 } else if (!strcmp(mode, "writethrough")) {
575 /* this is the default */
576 } else {
577 return -1;
578 }
579
580 return 0;
581}
582
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000583/**
584 * The copy-on-read flag is actually a reference count so multiple users may
585 * use the feature without worrying about clobbering its previous state.
586 * Copy-on-read stays enabled until all users have called to disable it.
587 */
588void bdrv_enable_copy_on_read(BlockDriverState *bs)
589{
590 bs->copy_on_read++;
591}
592
593void bdrv_disable_copy_on_read(BlockDriverState *bs)
594{
595 assert(bs->copy_on_read > 0);
596 bs->copy_on_read--;
597}
598
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200599/*
Kevin Wolf57915332010-04-14 15:24:50 +0200600 * Common part for opening disk images and files
601 */
602static int bdrv_open_common(BlockDriverState *bs, const char *filename,
603 int flags, BlockDriver *drv)
604{
605 int ret, open_flags;
606
607 assert(drv != NULL);
608
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100609 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
610
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200611 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100612 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200613 bs->encrypted = 0;
614 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100615 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200616 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100617 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200618 bs->buffer_alignment = 512;
619
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000620 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
621 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
622 bdrv_enable_copy_on_read(bs);
623 }
624
Kevin Wolf57915332010-04-14 15:24:50 +0200625 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100626 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200627
628 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
629 return -ENOTSUP;
630 }
631
632 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500633 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200634
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100635 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200636
637 /*
638 * Clear flags that are internal to the block layer before opening the
639 * image.
640 */
641 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
642
643 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200644 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200645 */
646 if (bs->is_temporary) {
647 open_flags |= BDRV_O_RDWR;
648 }
649
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100650 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
651
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200652 /* Open the image, either directly or using a protocol */
653 if (drv->bdrv_file_open) {
654 ret = drv->bdrv_file_open(bs, filename, open_flags);
655 } else {
656 ret = bdrv_file_open(&bs->file, filename, open_flags);
657 if (ret >= 0) {
658 ret = drv->bdrv_open(bs, open_flags);
659 }
660 }
661
Kevin Wolf57915332010-04-14 15:24:50 +0200662 if (ret < 0) {
663 goto free_and_fail;
664 }
665
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100666 ret = refresh_total_sectors(bs, bs->total_sectors);
667 if (ret < 0) {
668 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200669 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100670
Kevin Wolf57915332010-04-14 15:24:50 +0200671#ifndef _WIN32
672 if (bs->is_temporary) {
673 unlink(filename);
674 }
675#endif
676 return 0;
677
678free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200679 if (bs->file) {
680 bdrv_delete(bs->file);
681 bs->file = NULL;
682 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500683 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200684 bs->opaque = NULL;
685 bs->drv = NULL;
686 return ret;
687}
688
689/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200690 * Opens a file using a protocol (file, host_device, nbd, ...)
691 */
bellard83f64092006-08-01 16:21:11 +0000692int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000693{
bellard83f64092006-08-01 16:21:11 +0000694 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200695 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000696 int ret;
697
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900698 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200699 if (!drv) {
700 return -ENOENT;
701 }
702
bellard83f64092006-08-01 16:21:11 +0000703 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200704 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000705 if (ret < 0) {
706 bdrv_delete(bs);
707 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000708 }
aliguori71d07702009-03-03 17:37:16 +0000709 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000710 *pbs = bs;
711 return 0;
bellardea2384d2004-08-01 21:59:26 +0000712}
bellardfc01f7e2003-06-30 10:03:06 +0000713
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200714/*
715 * Opens a disk image (raw, qcow2, vmdk, ...)
716 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200717int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
718 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000719{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200720 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200721 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000722
bellard83f64092006-08-01 16:21:11 +0000723 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000724 BlockDriverState *bs1;
725 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000726 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200727 BlockDriver *bdrv_qcow2;
728 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200729 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000730
bellardea2384d2004-08-01 21:59:26 +0000731 /* if snapshot, we create a temporary backing file and open it
732 instead of opening 'filename' directly */
733
734 /* if there is a backing file, use it */
735 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200736 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000737 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000738 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000739 return ret;
bellardea2384d2004-08-01 21:59:26 +0000740 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200741 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000742
743 if (bs1->drv && bs1->drv->protocol_name)
744 is_protocol = 1;
745
bellardea2384d2004-08-01 21:59:26 +0000746 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000747
bellardea2384d2004-08-01 21:59:26 +0000748 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000749
750 /* Real path is meaningless for protocols */
751 if (is_protocol)
752 snprintf(backing_filename, sizeof(backing_filename),
753 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000754 else if (!realpath(filename, backing_filename))
755 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000756
Kevin Wolf91a073a2009-05-27 14:48:06 +0200757 bdrv_qcow2 = bdrv_find_format("qcow2");
758 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
759
Jes Sorensen3e829902010-05-27 16:20:30 +0200760 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200761 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
762 if (drv) {
763 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
764 drv->format_name);
765 }
766
767 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200768 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000769 if (ret < 0) {
770 return ret;
bellardea2384d2004-08-01 21:59:26 +0000771 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200772
bellardea2384d2004-08-01 21:59:26 +0000773 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200774 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000775 bs->is_temporary = 1;
776 }
bellard712e7872005-04-28 21:09:32 +0000777
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200778 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200779 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200780 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000781 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100782
aliguori51d7c002009-03-05 23:00:29 +0000783 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000784 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000785 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200786
787 /* Open the image */
788 ret = bdrv_open_common(bs, filename, flags, drv);
789 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100790 goto unlink_and_fail;
791 }
792
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200793 /* If there is a backing file, use it */
794 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
795 char backing_filename[PATH_MAX];
796 int back_flags;
797 BlockDriver *back_drv = NULL;
798
799 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000800
801 if (path_has_protocol(bs->backing_file)) {
802 pstrcpy(backing_filename, sizeof(backing_filename),
803 bs->backing_file);
804 } else {
805 path_combine(backing_filename, sizeof(backing_filename),
806 filename, bs->backing_file);
807 }
808
809 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200810 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000811 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200812
813 /* backing files always opened read-only */
814 back_flags =
815 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
816
817 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
818 if (ret < 0) {
819 bdrv_close(bs);
820 return ret;
821 }
822 if (bs->is_temporary) {
823 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
824 } else {
825 /* base image inherits from "parent" */
826 bs->backing_hd->keep_read_only = bs->keep_read_only;
827 }
828 }
829
830 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200832 }
833
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800834 /* throttling disk I/O limits */
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_enable(bs);
837 }
838
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200839 return 0;
840
841unlink_and_fail:
842 if (bs->is_temporary) {
843 unlink(filename);
844 }
845 return ret;
846}
847
bellardfc01f7e2003-06-30 10:03:06 +0000848void bdrv_close(BlockDriverState *bs)
849{
Liu Yuan80ccf932012-04-20 17:10:56 +0800850 bdrv_flush(bs);
bellard19cb3732006-08-19 11:45:59 +0000851 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200852 if (bs->job) {
853 block_job_cancel_sync(bs->job);
854 }
Kevin Wolf7094f122012-04-11 11:06:37 +0200855 bdrv_drain_all();
856
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200857 if (bs == bs_snapshots) {
858 bs_snapshots = NULL;
859 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100860 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000861 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100862 bs->backing_hd = NULL;
863 }
bellardea2384d2004-08-01 21:59:26 +0000864 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500865 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000866#ifdef _WIN32
867 if (bs->is_temporary) {
868 unlink(bs->filename);
869 }
bellard67b915a2004-03-31 23:37:16 +0000870#endif
bellardea2384d2004-08-01 21:59:26 +0000871 bs->opaque = NULL;
872 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000873 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +0200874 bs->backing_file[0] = '\0';
875 bs->backing_format[0] = '\0';
bellardb3380822004-03-14 21:38:54 +0000876
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200877 if (bs->file != NULL) {
Paolo Bonzini0ac93772012-05-08 16:51:44 +0200878 bdrv_delete(bs->file);
879 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200880 }
881
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200882 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000883 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800884
885 /*throttling disk I/O limits*/
886 if (bs->io_limits_enabled) {
887 bdrv_io_limits_disable(bs);
888 }
bellardb3380822004-03-14 21:38:54 +0000889}
890
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900891void bdrv_close_all(void)
892{
893 BlockDriverState *bs;
894
895 QTAILQ_FOREACH(bs, &bdrv_states, list) {
896 bdrv_close(bs);
897 }
898}
899
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000900/*
901 * Wait for pending requests to complete across all BlockDriverStates
902 *
903 * This function does not flush data to disk, use bdrv_flush_all() for that
904 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200905 *
906 * Note that completion of an asynchronous I/O operation can trigger any
907 * number of other I/O operations on other devices---for example a coroutine
908 * can be arbitrarily complex and a constant flow of I/O can come until the
909 * coroutine is complete. Because of this, it is not possible to have a
910 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000911 */
912void bdrv_drain_all(void)
913{
914 BlockDriverState *bs;
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200915 bool busy;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000916
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200917 do {
918 busy = qemu_aio_wait();
919
920 /* FIXME: We do not have timer support here, so this is effectively
921 * a busy wait.
922 */
923 QTAILQ_FOREACH(bs, &bdrv_states, list) {
924 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
925 qemu_co_queue_restart_all(&bs->throttled_reqs);
926 busy = true;
927 }
928 }
929 } while (busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000930
931 /* If requests are still pending there is a bug somewhere */
932 QTAILQ_FOREACH(bs, &bdrv_states, list) {
933 assert(QLIST_EMPTY(&bs->tracked_requests));
934 assert(qemu_co_queue_empty(&bs->throttled_reqs));
935 }
936}
937
Ryan Harperd22b2f42011-03-29 20:51:47 -0500938/* make a BlockDriverState anonymous by removing from bdrv_state list.
939 Also, NULL terminate the device_name to prevent double remove */
940void bdrv_make_anon(BlockDriverState *bs)
941{
942 if (bs->device_name[0] != '\0') {
943 QTAILQ_REMOVE(&bdrv_states, bs, list);
944 }
945 bs->device_name[0] = '\0';
946}
947
Paolo Bonzinie023b2e2012-05-08 16:51:41 +0200948static void bdrv_rebind(BlockDriverState *bs)
949{
950 if (bs->drv && bs->drv->bdrv_rebind) {
951 bs->drv->bdrv_rebind(bs);
952 }
953}
954
Jeff Cody8802d1f2012-02-28 15:54:06 -0500955/*
956 * Add new bs contents at the top of an image chain while the chain is
957 * live, while keeping required fields on the top layer.
958 *
959 * This will modify the BlockDriverState fields, and swap contents
960 * between bs_new and bs_top. Both bs_new and bs_top are modified.
961 *
Jeff Codyf6801b82012-03-27 16:30:19 -0400962 * bs_new is required to be anonymous.
963 *
Jeff Cody8802d1f2012-02-28 15:54:06 -0500964 * This function does not create any image files.
965 */
966void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
967{
968 BlockDriverState tmp;
969
Jeff Codyf6801b82012-03-27 16:30:19 -0400970 /* bs_new must be anonymous */
971 assert(bs_new->device_name[0] == '\0');
Jeff Cody8802d1f2012-02-28 15:54:06 -0500972
973 tmp = *bs_new;
974
975 /* there are some fields that need to stay on the top layer: */
Paolo Bonzini3a389e72012-05-08 16:51:42 +0200976 tmp.open_flags = bs_top->open_flags;
Jeff Cody8802d1f2012-02-28 15:54:06 -0500977
978 /* dev info */
979 tmp.dev_ops = bs_top->dev_ops;
980 tmp.dev_opaque = bs_top->dev_opaque;
981 tmp.dev = bs_top->dev;
982 tmp.buffer_alignment = bs_top->buffer_alignment;
983 tmp.copy_on_read = bs_top->copy_on_read;
984
985 /* i/o timing parameters */
986 tmp.slice_time = bs_top->slice_time;
987 tmp.slice_start = bs_top->slice_start;
988 tmp.slice_end = bs_top->slice_end;
989 tmp.io_limits = bs_top->io_limits;
990 tmp.io_base = bs_top->io_base;
991 tmp.throttled_reqs = bs_top->throttled_reqs;
992 tmp.block_timer = bs_top->block_timer;
993 tmp.io_limits_enabled = bs_top->io_limits_enabled;
994
995 /* geometry */
996 tmp.cyls = bs_top->cyls;
997 tmp.heads = bs_top->heads;
998 tmp.secs = bs_top->secs;
999 tmp.translation = bs_top->translation;
1000
1001 /* r/w error */
1002 tmp.on_read_error = bs_top->on_read_error;
1003 tmp.on_write_error = bs_top->on_write_error;
1004
1005 /* i/o status */
1006 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1007 tmp.iostatus = bs_top->iostatus;
1008
1009 /* keep the same entry in bdrv_states */
1010 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1011 tmp.list = bs_top->list;
1012
1013 /* The contents of 'tmp' will become bs_top, as we are
1014 * swapping bs_new and bs_top contents. */
1015 tmp.backing_hd = bs_new;
1016 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
Jeff Codyf6801b82012-03-27 16:30:19 -04001017 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
Jeff Cody8802d1f2012-02-28 15:54:06 -05001018
1019 /* swap contents of the fixed new bs and the current top */
1020 *bs_new = *bs_top;
1021 *bs_top = tmp;
1022
Jeff Codyf6801b82012-03-27 16:30:19 -04001023 /* device_name[] was carried over from the old bs_top. bs_new
1024 * shouldn't be in bdrv_states, so we need to make device_name[]
1025 * reflect the anonymity of bs_new
1026 */
1027 bs_new->device_name[0] = '\0';
1028
Jeff Cody8802d1f2012-02-28 15:54:06 -05001029 /* clear the copied fields in the new backing file */
1030 bdrv_detach_dev(bs_new, bs_new->dev);
1031
1032 qemu_co_queue_init(&bs_new->throttled_reqs);
1033 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1034 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1035 bdrv_iostatus_disable(bs_new);
1036
1037 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1038 * to affect or delete the block_timer, as it has been moved to bs_top */
1039 bs_new->io_limits_enabled = false;
1040 bs_new->block_timer = NULL;
1041 bs_new->slice_time = 0;
1042 bs_new->slice_start = 0;
1043 bs_new->slice_end = 0;
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001044
1045 bdrv_rebind(bs_new);
1046 bdrv_rebind(bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001047}
1048
bellardb3380822004-03-14 21:38:54 +00001049void bdrv_delete(BlockDriverState *bs)
1050{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001051 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001052 assert(!bs->job);
1053 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +02001054
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001055 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001056 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001057
bellardb3380822004-03-14 21:38:54 +00001058 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001059
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001060 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -05001061 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001062}
1063
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001064int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1065/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001066{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001067 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001068 return -EBUSY;
1069 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001070 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001071 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001072 return 0;
1073}
1074
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001075/* TODO qdevified devices don't use this, remove when devices are qdevified */
1076void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001077{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001078 if (bdrv_attach_dev(bs, dev) < 0) {
1079 abort();
1080 }
1081}
1082
1083void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1084/* TODO change to DeviceState *dev when all users are qdevified */
1085{
1086 assert(bs->dev == dev);
1087 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001088 bs->dev_ops = NULL;
1089 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001090 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001091}
1092
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001093/* TODO change to return DeviceState * when all users are qdevified */
1094void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001095{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001096 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001097}
1098
Markus Armbruster0e49de52011-08-03 15:07:41 +02001099void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1100 void *opaque)
1101{
1102 bs->dev_ops = ops;
1103 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001104 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1105 bs_snapshots = NULL;
1106 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001107}
1108
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001109void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1110 BlockQMPEventAction action, int is_read)
1111{
1112 QObject *data;
1113 const char *action_str;
1114
1115 switch (action) {
1116 case BDRV_ACTION_REPORT:
1117 action_str = "report";
1118 break;
1119 case BDRV_ACTION_IGNORE:
1120 action_str = "ignore";
1121 break;
1122 case BDRV_ACTION_STOP:
1123 action_str = "stop";
1124 break;
1125 default:
1126 abort();
1127 }
1128
1129 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1130 bdrv->device_name,
1131 action_str,
1132 is_read ? "read" : "write");
1133 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1134
1135 qobject_decref(data);
1136}
1137
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001138static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1139{
1140 QObject *data;
1141
1142 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1143 bdrv_get_device_name(bs), ejected);
1144 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1145
1146 qobject_decref(data);
1147}
1148
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001149static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001150{
Markus Armbruster145feb12011-08-03 15:07:42 +02001151 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001152 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001153 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001154 if (tray_was_closed) {
1155 /* tray open */
1156 bdrv_emit_qmp_eject_event(bs, true);
1157 }
1158 if (load) {
1159 /* tray close */
1160 bdrv_emit_qmp_eject_event(bs, false);
1161 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001162 }
1163}
1164
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001165bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1166{
1167 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1168}
1169
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001170void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1171{
1172 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1173 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1174 }
1175}
1176
Markus Armbrustere4def802011-09-06 18:58:53 +02001177bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1178{
1179 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1180 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1181 }
1182 return false;
1183}
1184
Markus Armbruster145feb12011-08-03 15:07:42 +02001185static void bdrv_dev_resize_cb(BlockDriverState *bs)
1186{
1187 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1188 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001189 }
1190}
1191
Markus Armbrusterf1076392011-09-06 18:58:46 +02001192bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1193{
1194 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1195 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1196 }
1197 return false;
1198}
1199
aliguorie97fc192009-04-21 23:11:50 +00001200/*
1201 * Run consistency checks on an image
1202 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001203 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001204 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001205 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001206 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001207int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001208{
1209 if (bs->drv->bdrv_check == NULL) {
1210 return -ENOTSUP;
1211 }
1212
Kevin Wolfe076f332010-06-29 11:43:13 +02001213 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001214 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001215}
1216
Kevin Wolf8a426612010-07-16 17:17:01 +02001217#define COMMIT_BUF_SECTORS 2048
1218
bellard33e39632003-07-06 17:15:21 +00001219/* commit COW file into the raw image */
1220int bdrv_commit(BlockDriverState *bs)
1221{
bellard19cb3732006-08-19 11:45:59 +00001222 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001223 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001224 int64_t sector, total_sectors;
1225 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001226 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001227 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001228 char filename[1024];
1229 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001230
bellard19cb3732006-08-19 11:45:59 +00001231 if (!drv)
1232 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001233
1234 if (!bs->backing_hd) {
1235 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001236 }
1237
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001238 if (bs->backing_hd->keep_read_only) {
1239 return -EACCES;
1240 }
Kevin Wolfee181192010-08-05 13:05:22 +02001241
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001242 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1243 return -EBUSY;
1244 }
1245
Kevin Wolfee181192010-08-05 13:05:22 +02001246 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001247 ro = bs->backing_hd->read_only;
1248 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1249 open_flags = bs->backing_hd->open_flags;
1250
1251 if (ro) {
1252 /* re-open as RW */
1253 bdrv_delete(bs->backing_hd);
1254 bs->backing_hd = NULL;
1255 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001256 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1257 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001258 if (rw_ret < 0) {
1259 bdrv_delete(bs_rw);
1260 /* try to re-open read-only */
1261 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001262 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1263 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001264 if (ret < 0) {
1265 bdrv_delete(bs_ro);
1266 /* drive not functional anymore */
1267 bs->drv = NULL;
1268 return ret;
1269 }
1270 bs->backing_hd = bs_ro;
1271 return rw_ret;
1272 }
1273 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001274 }
bellardea2384d2004-08-01 21:59:26 +00001275
Jan Kiszka6ea44302009-11-30 18:21:19 +01001276 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001277 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001278
Kevin Wolf8a426612010-07-16 17:17:01 +02001279 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001280 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001281
1282 if (bdrv_read(bs, sector, buf, n) != 0) {
1283 ret = -EIO;
1284 goto ro_cleanup;
1285 }
1286
1287 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1288 ret = -EIO;
1289 goto ro_cleanup;
1290 }
bellardea2384d2004-08-01 21:59:26 +00001291 }
1292 }
bellard95389c82005-12-18 18:28:15 +00001293
Christoph Hellwig1d449522010-01-17 12:32:30 +01001294 if (drv->bdrv_make_empty) {
1295 ret = drv->bdrv_make_empty(bs);
1296 bdrv_flush(bs);
1297 }
bellard95389c82005-12-18 18:28:15 +00001298
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001299 /*
1300 * Make sure all data we wrote to the backing device is actually
1301 * stable on disk.
1302 */
1303 if (bs->backing_hd)
1304 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001305
1306ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001307 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001308
1309 if (ro) {
1310 /* re-open as RO */
1311 bdrv_delete(bs->backing_hd);
1312 bs->backing_hd = NULL;
1313 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001314 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1315 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001316 if (ret < 0) {
1317 bdrv_delete(bs_ro);
1318 /* drive not functional anymore */
1319 bs->drv = NULL;
1320 return ret;
1321 }
1322 bs->backing_hd = bs_ro;
1323 bs->backing_hd->keep_read_only = 0;
1324 }
1325
Christoph Hellwig1d449522010-01-17 12:32:30 +01001326 return ret;
bellard33e39632003-07-06 17:15:21 +00001327}
1328
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001329int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001330{
1331 BlockDriverState *bs;
1332
1333 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001334 int ret = bdrv_commit(bs);
1335 if (ret < 0) {
1336 return ret;
1337 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001338 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001339 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001340}
1341
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001342struct BdrvTrackedRequest {
1343 BlockDriverState *bs;
1344 int64_t sector_num;
1345 int nb_sectors;
1346 bool is_write;
1347 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001348 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001349 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001350};
1351
1352/**
1353 * Remove an active request from the tracked requests list
1354 *
1355 * This function should be called when a tracked request is completing.
1356 */
1357static void tracked_request_end(BdrvTrackedRequest *req)
1358{
1359 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001360 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001361}
1362
1363/**
1364 * Add an active request to the tracked requests list
1365 */
1366static void tracked_request_begin(BdrvTrackedRequest *req,
1367 BlockDriverState *bs,
1368 int64_t sector_num,
1369 int nb_sectors, bool is_write)
1370{
1371 *req = (BdrvTrackedRequest){
1372 .bs = bs,
1373 .sector_num = sector_num,
1374 .nb_sectors = nb_sectors,
1375 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001376 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001377 };
1378
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001379 qemu_co_queue_init(&req->wait_queue);
1380
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001381 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1382}
1383
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001384/**
1385 * Round a region to cluster boundaries
1386 */
1387static void round_to_clusters(BlockDriverState *bs,
1388 int64_t sector_num, int nb_sectors,
1389 int64_t *cluster_sector_num,
1390 int *cluster_nb_sectors)
1391{
1392 BlockDriverInfo bdi;
1393
1394 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1395 *cluster_sector_num = sector_num;
1396 *cluster_nb_sectors = nb_sectors;
1397 } else {
1398 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1399 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1400 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1401 nb_sectors, c);
1402 }
1403}
1404
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001405static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1406 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001407 /* aaaa bbbb */
1408 if (sector_num >= req->sector_num + req->nb_sectors) {
1409 return false;
1410 }
1411 /* bbbb aaaa */
1412 if (req->sector_num >= sector_num + nb_sectors) {
1413 return false;
1414 }
1415 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001416}
1417
1418static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1419 int64_t sector_num, int nb_sectors)
1420{
1421 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001422 int64_t cluster_sector_num;
1423 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001424 bool retry;
1425
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001426 /* If we touch the same cluster it counts as an overlap. This guarantees
1427 * that allocating writes will be serialized and not race with each other
1428 * for the same cluster. For example, in copy-on-read it ensures that the
1429 * CoR read and write operations are atomic and guest writes cannot
1430 * interleave between them.
1431 */
1432 round_to_clusters(bs, sector_num, nb_sectors,
1433 &cluster_sector_num, &cluster_nb_sectors);
1434
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001435 do {
1436 retry = false;
1437 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001438 if (tracked_request_overlaps(req, cluster_sector_num,
1439 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001440 /* Hitting this means there was a reentrant request, for
1441 * example, a block driver issuing nested requests. This must
1442 * never happen since it means deadlock.
1443 */
1444 assert(qemu_coroutine_self() != req->co);
1445
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001446 qemu_co_queue_wait(&req->wait_queue);
1447 retry = true;
1448 break;
1449 }
1450 }
1451 } while (retry);
1452}
1453
Kevin Wolf756e6732010-01-12 12:55:17 +01001454/*
1455 * Return values:
1456 * 0 - success
1457 * -EINVAL - backing format specified, but no file
1458 * -ENOSPC - can't update the backing file because no space is left in the
1459 * image file header
1460 * -ENOTSUP - format driver doesn't support changing the backing file
1461 */
1462int bdrv_change_backing_file(BlockDriverState *bs,
1463 const char *backing_file, const char *backing_fmt)
1464{
1465 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02001466 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001467
Paolo Bonzini5f377792012-04-12 14:01:01 +02001468 /* Backing file format doesn't make sense without a backing file */
1469 if (backing_fmt && !backing_file) {
1470 return -EINVAL;
1471 }
1472
Kevin Wolf756e6732010-01-12 12:55:17 +01001473 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001474 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01001475 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001476 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01001477 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02001478
1479 if (ret == 0) {
1480 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1481 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1482 }
1483 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001484}
1485
aliguori71d07702009-03-03 17:37:16 +00001486static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1487 size_t size)
1488{
1489 int64_t len;
1490
1491 if (!bdrv_is_inserted(bs))
1492 return -ENOMEDIUM;
1493
1494 if (bs->growable)
1495 return 0;
1496
1497 len = bdrv_getlength(bs);
1498
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001499 if (offset < 0)
1500 return -EIO;
1501
1502 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001503 return -EIO;
1504
1505 return 0;
1506}
1507
1508static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1509 int nb_sectors)
1510{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001511 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1512 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001513}
1514
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001515typedef struct RwCo {
1516 BlockDriverState *bs;
1517 int64_t sector_num;
1518 int nb_sectors;
1519 QEMUIOVector *qiov;
1520 bool is_write;
1521 int ret;
1522} RwCo;
1523
1524static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1525{
1526 RwCo *rwco = opaque;
1527
1528 if (!rwco->is_write) {
1529 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001530 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001531 } else {
1532 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001533 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001534 }
1535}
1536
1537/*
1538 * Process a synchronous request using coroutines
1539 */
1540static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1541 int nb_sectors, bool is_write)
1542{
1543 QEMUIOVector qiov;
1544 struct iovec iov = {
1545 .iov_base = (void *)buf,
1546 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1547 };
1548 Coroutine *co;
1549 RwCo rwco = {
1550 .bs = bs,
1551 .sector_num = sector_num,
1552 .nb_sectors = nb_sectors,
1553 .qiov = &qiov,
1554 .is_write = is_write,
1555 .ret = NOT_DONE,
1556 };
1557
1558 qemu_iovec_init_external(&qiov, &iov, 1);
1559
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001560 /**
1561 * In sync call context, when the vcpu is blocked, this throttling timer
1562 * will not fire; so the I/O throttling function has to be disabled here
1563 * if it has been enabled.
1564 */
1565 if (bs->io_limits_enabled) {
1566 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1567 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1568 bdrv_io_limits_disable(bs);
1569 }
1570
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001571 if (qemu_in_coroutine()) {
1572 /* Fast-path if already in coroutine context */
1573 bdrv_rw_co_entry(&rwco);
1574 } else {
1575 co = qemu_coroutine_create(bdrv_rw_co_entry);
1576 qemu_coroutine_enter(co, &rwco);
1577 while (rwco.ret == NOT_DONE) {
1578 qemu_aio_wait();
1579 }
1580 }
1581 return rwco.ret;
1582}
1583
bellard19cb3732006-08-19 11:45:59 +00001584/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001585int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001586 uint8_t *buf, int nb_sectors)
1587{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001588 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001589}
1590
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001591#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1592
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001593static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001594 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001595{
1596 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001597 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001598
Jan Kiszka6ea44302009-11-30 18:21:19 +01001599 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001600 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001601
1602 for (; start <= end; start++) {
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001603 idx = start / BITS_PER_LONG;
1604 bit = start % BITS_PER_LONG;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001605 val = bs->dirty_bitmap[idx];
1606 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001607 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001608 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001609 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001610 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001611 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001612 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001613 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001614 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001615 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001616 }
1617 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001618 }
1619}
1620
ths5fafdf22007-09-16 21:08:06 +00001621/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001622 -EIO generic I/O error (may happen for all errors)
1623 -ENOMEDIUM No media inserted.
1624 -EINVAL Invalid sector number or nb_sectors
1625 -EACCES Trying to write a read-only device
1626*/
ths5fafdf22007-09-16 21:08:06 +00001627int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001628 const uint8_t *buf, int nb_sectors)
1629{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001630 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001631}
1632
aliguorieda578e2009-03-12 19:57:16 +00001633int bdrv_pread(BlockDriverState *bs, int64_t offset,
1634 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001635{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001636 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001637 int len, nb_sectors, count;
1638 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001639 int ret;
bellard83f64092006-08-01 16:21:11 +00001640
1641 count = count1;
1642 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001643 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001644 if (len > count)
1645 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001646 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001647 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001648 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1649 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001650 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001651 count -= len;
1652 if (count == 0)
1653 return count1;
1654 sector_num++;
1655 buf += len;
1656 }
1657
1658 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001659 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001660 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001661 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1662 return ret;
bellard83f64092006-08-01 16:21:11 +00001663 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001664 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001665 buf += len;
1666 count -= len;
1667 }
1668
1669 /* add data from the last sector */
1670 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001671 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1672 return ret;
bellard83f64092006-08-01 16:21:11 +00001673 memcpy(buf, tmp_buf, count);
1674 }
1675 return count1;
1676}
1677
aliguorieda578e2009-03-12 19:57:16 +00001678int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1679 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001680{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001681 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001682 int len, nb_sectors, count;
1683 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001684 int ret;
bellard83f64092006-08-01 16:21:11 +00001685
1686 count = count1;
1687 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001688 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001689 if (len > count)
1690 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001691 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001692 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001693 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1694 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001695 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001696 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1697 return ret;
bellard83f64092006-08-01 16:21:11 +00001698 count -= len;
1699 if (count == 0)
1700 return count1;
1701 sector_num++;
1702 buf += len;
1703 }
1704
1705 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001706 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001707 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001708 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1709 return ret;
bellard83f64092006-08-01 16:21:11 +00001710 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001711 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001712 buf += len;
1713 count -= len;
1714 }
1715
1716 /* add data from the last sector */
1717 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001718 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1719 return ret;
bellard83f64092006-08-01 16:21:11 +00001720 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001721 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1722 return ret;
bellard83f64092006-08-01 16:21:11 +00001723 }
1724 return count1;
1725}
bellard83f64092006-08-01 16:21:11 +00001726
Kevin Wolff08145f2010-06-16 16:38:15 +02001727/*
1728 * Writes to the file and ensures that no writes are reordered across this
1729 * request (acts as a barrier)
1730 *
1731 * Returns 0 on success, -errno in error cases.
1732 */
1733int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1734 const void *buf, int count)
1735{
1736 int ret;
1737
1738 ret = bdrv_pwrite(bs, offset, buf, count);
1739 if (ret < 0) {
1740 return ret;
1741 }
1742
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001743 /* No flush needed for cache modes that use O_DSYNC */
1744 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001745 bdrv_flush(bs);
1746 }
1747
1748 return 0;
1749}
1750
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001751static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001752 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1753{
1754 /* Perform I/O through a temporary buffer so that users who scribble over
1755 * their read buffer while the operation is in progress do not end up
1756 * modifying the image file. This is critical for zero-copy guest I/O
1757 * where anything might happen inside guest memory.
1758 */
1759 void *bounce_buffer;
1760
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001761 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001762 struct iovec iov;
1763 QEMUIOVector bounce_qiov;
1764 int64_t cluster_sector_num;
1765 int cluster_nb_sectors;
1766 size_t skip_bytes;
1767 int ret;
1768
1769 /* Cover entire cluster so no additional backing file I/O is required when
1770 * allocating cluster in the image file.
1771 */
1772 round_to_clusters(bs, sector_num, nb_sectors,
1773 &cluster_sector_num, &cluster_nb_sectors);
1774
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001775 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1776 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001777
1778 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1779 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1780 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1781
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001782 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1783 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001784 if (ret < 0) {
1785 goto err;
1786 }
1787
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001788 if (drv->bdrv_co_write_zeroes &&
1789 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001790 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1791 cluster_nb_sectors);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001792 } else {
1793 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001794 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001795 }
1796
Stefan Hajnocziab185922011-11-17 13:40:31 +00001797 if (ret < 0) {
1798 /* It might be okay to ignore write errors for guest requests. If this
1799 * is a deliberate copy-on-read then we don't want to ignore the error.
1800 * Simply report it in all cases.
1801 */
1802 goto err;
1803 }
1804
1805 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1806 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1807 nb_sectors * BDRV_SECTOR_SIZE);
1808
1809err:
1810 qemu_vfree(bounce_buffer);
1811 return ret;
1812}
1813
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001814/*
1815 * Handle a read request in coroutine context
1816 */
1817static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001818 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1819 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001820{
1821 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001822 BdrvTrackedRequest req;
1823 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001824
Kevin Wolfda1fa912011-07-14 17:27:13 +02001825 if (!drv) {
1826 return -ENOMEDIUM;
1827 }
1828 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1829 return -EIO;
1830 }
1831
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001832 /* throttling disk read I/O */
1833 if (bs->io_limits_enabled) {
1834 bdrv_io_limits_intercept(bs, false, nb_sectors);
1835 }
1836
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001837 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001838 flags |= BDRV_REQ_COPY_ON_READ;
1839 }
1840 if (flags & BDRV_REQ_COPY_ON_READ) {
1841 bs->copy_on_read_in_flight++;
1842 }
1843
1844 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001845 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1846 }
1847
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001848 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001849
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001850 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001851 int pnum;
1852
1853 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1854 if (ret < 0) {
1855 goto out;
1856 }
1857
1858 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001859 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001860 goto out;
1861 }
1862 }
1863
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001864 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001865
1866out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001867 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001868
1869 if (flags & BDRV_REQ_COPY_ON_READ) {
1870 bs->copy_on_read_in_flight--;
1871 }
1872
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001873 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001874}
1875
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001876int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001877 int nb_sectors, QEMUIOVector *qiov)
1878{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001879 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001880
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001881 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1882}
1883
1884int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1885 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1886{
1887 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1888
1889 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1890 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001891}
1892
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001893static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1894 int64_t sector_num, int nb_sectors)
1895{
1896 BlockDriver *drv = bs->drv;
1897 QEMUIOVector qiov;
1898 struct iovec iov;
1899 int ret;
1900
Kevin Wolf621f0582012-03-20 15:12:58 +01001901 /* TODO Emulate only part of misaligned requests instead of letting block
1902 * drivers return -ENOTSUP and emulate everything */
1903
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001904 /* First try the efficient write zeroes operation */
1905 if (drv->bdrv_co_write_zeroes) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001906 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1907 if (ret != -ENOTSUP) {
1908 return ret;
1909 }
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001910 }
1911
1912 /* Fall back to bounce buffer if write zeroes is unsupported */
1913 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1914 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1915 memset(iov.iov_base, 0, iov.iov_len);
1916 qemu_iovec_init_external(&qiov, &iov, 1);
1917
1918 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1919
1920 qemu_vfree(iov.iov_base);
1921 return ret;
1922}
1923
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001924/*
1925 * Handle a write request in coroutine context
1926 */
1927static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001928 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1929 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001930{
1931 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001932 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001933 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001934
1935 if (!bs->drv) {
1936 return -ENOMEDIUM;
1937 }
1938 if (bs->read_only) {
1939 return -EACCES;
1940 }
1941 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1942 return -EIO;
1943 }
1944
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001945 /* throttling disk write I/O */
1946 if (bs->io_limits_enabled) {
1947 bdrv_io_limits_intercept(bs, true, nb_sectors);
1948 }
1949
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001950 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001951 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1952 }
1953
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001954 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1955
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001956 if (flags & BDRV_REQ_ZERO_WRITE) {
1957 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1958 } else {
1959 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1960 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001961
Kevin Wolfda1fa912011-07-14 17:27:13 +02001962 if (bs->dirty_bitmap) {
1963 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1964 }
1965
1966 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1967 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1968 }
1969
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001970 tracked_request_end(&req);
1971
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001972 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001973}
1974
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001975int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1976 int nb_sectors, QEMUIOVector *qiov)
1977{
1978 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1979
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001980 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1981}
1982
1983int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1984 int64_t sector_num, int nb_sectors)
1985{
1986 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1987
1988 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1989 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001990}
1991
bellard83f64092006-08-01 16:21:11 +00001992/**
bellard83f64092006-08-01 16:21:11 +00001993 * Truncate file to 'offset' bytes (needed only for file protocols)
1994 */
1995int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1996{
1997 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001998 int ret;
bellard83f64092006-08-01 16:21:11 +00001999 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002000 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00002001 if (!drv->bdrv_truncate)
2002 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02002003 if (bs->read_only)
2004 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02002005 if (bdrv_in_use(bs))
2006 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002007 ret = drv->bdrv_truncate(bs, offset);
2008 if (ret == 0) {
2009 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02002010 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002011 }
2012 return ret;
bellard83f64092006-08-01 16:21:11 +00002013}
2014
2015/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08002016 * Length of a allocated file in bytes. Sparse files are counted by actual
2017 * allocated space. Return < 0 if error or unknown.
2018 */
2019int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2020{
2021 BlockDriver *drv = bs->drv;
2022 if (!drv) {
2023 return -ENOMEDIUM;
2024 }
2025 if (drv->bdrv_get_allocated_file_size) {
2026 return drv->bdrv_get_allocated_file_size(bs);
2027 }
2028 if (bs->file) {
2029 return bdrv_get_allocated_file_size(bs->file);
2030 }
2031 return -ENOTSUP;
2032}
2033
2034/**
bellard83f64092006-08-01 16:21:11 +00002035 * Length of a file in bytes. Return < 0 if error or unknown.
2036 */
2037int64_t bdrv_getlength(BlockDriverState *bs)
2038{
2039 BlockDriver *drv = bs->drv;
2040 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002041 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002042
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002043 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002044 if (drv->bdrv_getlength) {
2045 return drv->bdrv_getlength(bs);
2046 }
bellard83f64092006-08-01 16:21:11 +00002047 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002048 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00002049}
2050
bellard19cb3732006-08-19 11:45:59 +00002051/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00002052void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00002053{
bellard19cb3732006-08-19 11:45:59 +00002054 int64_t length;
2055 length = bdrv_getlength(bs);
2056 if (length < 0)
2057 length = 0;
2058 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01002059 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00002060 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00002061}
bellardcf989512004-02-16 21:56:36 +00002062
aliguorif3d54fc2008-11-25 21:50:24 +00002063struct partition {
2064 uint8_t boot_ind; /* 0x80 - active */
2065 uint8_t head; /* starting head */
2066 uint8_t sector; /* starting sector */
2067 uint8_t cyl; /* starting cylinder */
2068 uint8_t sys_ind; /* What partition type */
2069 uint8_t end_head; /* end head */
2070 uint8_t end_sector; /* end sector */
2071 uint8_t end_cyl; /* end cylinder */
2072 uint32_t start_sect; /* starting sector counting from 0 */
2073 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02002074} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00002075
2076/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2077static int guess_disk_lchs(BlockDriverState *bs,
2078 int *pcylinders, int *pheads, int *psectors)
2079{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002080 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00002081 int ret, i, heads, sectors, cylinders;
2082 struct partition *p;
2083 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00002084 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002085 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002086
2087 bdrv_get_geometry(bs, &nb_sectors);
2088
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002089 /**
2090 * The function will be invoked during startup not only in sync I/O mode,
2091 * but also in async I/O mode. So the I/O throttling function has to
2092 * be disabled temporarily here, not permanently.
2093 */
2094 enabled = bs->io_limits_enabled;
2095 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00002096 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002097 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002098 if (ret < 0)
2099 return -1;
2100 /* test msdos magic */
2101 if (buf[510] != 0x55 || buf[511] != 0xaa)
2102 return -1;
2103 for(i = 0; i < 4; i++) {
2104 p = ((struct partition *)(buf + 0x1be)) + i;
2105 nr_sects = le32_to_cpu(p->nr_sects);
2106 if (nr_sects && p->end_head) {
2107 /* We make the assumption that the partition terminates on
2108 a cylinder boundary */
2109 heads = p->end_head + 1;
2110 sectors = p->end_sector & 63;
2111 if (sectors == 0)
2112 continue;
2113 cylinders = nb_sectors / (heads * sectors);
2114 if (cylinders < 1 || cylinders > 16383)
2115 continue;
2116 *pheads = heads;
2117 *psectors = sectors;
2118 *pcylinders = cylinders;
2119#if 0
2120 printf("guessed geometry: LCHS=%d %d %d\n",
2121 cylinders, heads, sectors);
2122#endif
2123 return 0;
2124 }
2125 }
2126 return -1;
2127}
2128
2129void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2130{
2131 int translation, lba_detected = 0;
2132 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002133 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002134
2135 /* if a geometry hint is available, use it */
2136 bdrv_get_geometry(bs, &nb_sectors);
2137 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2138 translation = bdrv_get_translation_hint(bs);
2139 if (cylinders != 0) {
2140 *pcyls = cylinders;
2141 *pheads = heads;
2142 *psecs = secs;
2143 } else {
2144 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2145 if (heads > 16) {
2146 /* if heads > 16, it means that a BIOS LBA
2147 translation was active, so the default
2148 hardware geometry is OK */
2149 lba_detected = 1;
2150 goto default_geometry;
2151 } else {
2152 *pcyls = cylinders;
2153 *pheads = heads;
2154 *psecs = secs;
2155 /* disable any translation to be in sync with
2156 the logical geometry */
2157 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2158 bdrv_set_translation_hint(bs,
2159 BIOS_ATA_TRANSLATION_NONE);
2160 }
2161 }
2162 } else {
2163 default_geometry:
2164 /* if no geometry, use a standard physical disk geometry */
2165 cylinders = nb_sectors / (16 * 63);
2166
2167 if (cylinders > 16383)
2168 cylinders = 16383;
2169 else if (cylinders < 2)
2170 cylinders = 2;
2171 *pcyls = cylinders;
2172 *pheads = 16;
2173 *psecs = 63;
2174 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2175 if ((*pcyls * *pheads) <= 131072) {
2176 bdrv_set_translation_hint(bs,
2177 BIOS_ATA_TRANSLATION_LARGE);
2178 } else {
2179 bdrv_set_translation_hint(bs,
2180 BIOS_ATA_TRANSLATION_LBA);
2181 }
2182 }
2183 }
2184 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2185 }
2186}
2187
ths5fafdf22007-09-16 21:08:06 +00002188void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002189 int cyls, int heads, int secs)
2190{
2191 bs->cyls = cyls;
2192 bs->heads = heads;
2193 bs->secs = secs;
2194}
2195
bellard46d47672004-11-16 01:45:27 +00002196void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2197{
2198 bs->translation = translation;
2199}
2200
ths5fafdf22007-09-16 21:08:06 +00002201void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002202 int *pcyls, int *pheads, int *psecs)
2203{
2204 *pcyls = bs->cyls;
2205 *pheads = bs->heads;
2206 *psecs = bs->secs;
2207}
2208
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002209/* throttling disk io limits */
2210void bdrv_set_io_limits(BlockDriverState *bs,
2211 BlockIOLimit *io_limits)
2212{
2213 bs->io_limits = *io_limits;
2214 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2215}
2216
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002217/* Recognize floppy formats */
2218typedef struct FDFormat {
2219 FDriveType drive;
2220 uint8_t last_sect;
2221 uint8_t max_track;
2222 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002223 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002224} FDFormat;
2225
2226static const FDFormat fd_formats[] = {
2227 /* First entry is default format */
2228 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002229 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2230 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2231 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2232 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2233 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2234 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2235 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2236 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002237 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002238 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2239 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2240 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2241 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2242 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002243 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002244 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2245 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2246 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2247 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2248 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2249 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002250 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002251 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2252 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2253 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2254 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2255 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002256 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002257 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2258 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002259 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002260 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2261 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2262 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2263 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002264 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002265 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2266 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002267 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002268 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002269 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002270 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002271};
2272
2273void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2274 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002275 FDriveType drive_in, FDriveType *drive,
2276 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002277{
2278 const FDFormat *parse;
2279 uint64_t nb_sectors, size;
2280 int i, first_match, match;
2281
2282 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2283 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2284 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002285 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002286 } else {
2287 bdrv_get_geometry(bs, &nb_sectors);
2288 match = -1;
2289 first_match = -1;
2290 for (i = 0; ; i++) {
2291 parse = &fd_formats[i];
2292 if (parse->drive == FDRIVE_DRV_NONE) {
2293 break;
2294 }
2295 if (drive_in == parse->drive ||
2296 drive_in == FDRIVE_DRV_NONE) {
2297 size = (parse->max_head + 1) * parse->max_track *
2298 parse->last_sect;
2299 if (nb_sectors == size) {
2300 match = i;
2301 break;
2302 }
2303 if (first_match == -1) {
2304 first_match = i;
2305 }
2306 }
2307 }
2308 if (match == -1) {
2309 if (first_match == -1) {
2310 match = 1;
2311 } else {
2312 match = first_match;
2313 }
2314 parse = &fd_formats[match];
2315 }
2316 *nb_heads = parse->max_head + 1;
2317 *max_track = parse->max_track;
2318 *last_sect = parse->last_sect;
2319 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002320 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002321 }
2322}
2323
bellard46d47672004-11-16 01:45:27 +00002324int bdrv_get_translation_hint(BlockDriverState *bs)
2325{
2326 return bs->translation;
2327}
2328
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002329void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2330 BlockErrorAction on_write_error)
2331{
2332 bs->on_read_error = on_read_error;
2333 bs->on_write_error = on_write_error;
2334}
2335
2336BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2337{
2338 return is_read ? bs->on_read_error : bs->on_write_error;
2339}
2340
bellardb3380822004-03-14 21:38:54 +00002341int bdrv_is_read_only(BlockDriverState *bs)
2342{
2343 return bs->read_only;
2344}
2345
ths985a03b2007-12-24 16:10:43 +00002346int bdrv_is_sg(BlockDriverState *bs)
2347{
2348 return bs->sg;
2349}
2350
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002351int bdrv_enable_write_cache(BlockDriverState *bs)
2352{
2353 return bs->enable_write_cache;
2354}
2355
bellardea2384d2004-08-01 21:59:26 +00002356int bdrv_is_encrypted(BlockDriverState *bs)
2357{
2358 if (bs->backing_hd && bs->backing_hd->encrypted)
2359 return 1;
2360 return bs->encrypted;
2361}
2362
aliguoric0f4ce72009-03-05 23:01:01 +00002363int bdrv_key_required(BlockDriverState *bs)
2364{
2365 BlockDriverState *backing_hd = bs->backing_hd;
2366
2367 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2368 return 1;
2369 return (bs->encrypted && !bs->valid_key);
2370}
2371
bellardea2384d2004-08-01 21:59:26 +00002372int bdrv_set_key(BlockDriverState *bs, const char *key)
2373{
2374 int ret;
2375 if (bs->backing_hd && bs->backing_hd->encrypted) {
2376 ret = bdrv_set_key(bs->backing_hd, key);
2377 if (ret < 0)
2378 return ret;
2379 if (!bs->encrypted)
2380 return 0;
2381 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002382 if (!bs->encrypted) {
2383 return -EINVAL;
2384 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2385 return -ENOMEDIUM;
2386 }
aliguoric0f4ce72009-03-05 23:01:01 +00002387 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002388 if (ret < 0) {
2389 bs->valid_key = 0;
2390 } else if (!bs->valid_key) {
2391 bs->valid_key = 1;
2392 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002393 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002394 }
aliguoric0f4ce72009-03-05 23:01:01 +00002395 return ret;
bellardea2384d2004-08-01 21:59:26 +00002396}
2397
2398void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2399{
bellard19cb3732006-08-19 11:45:59 +00002400 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002401 buf[0] = '\0';
2402 } else {
2403 pstrcpy(buf, buf_size, bs->drv->format_name);
2404 }
2405}
2406
ths5fafdf22007-09-16 21:08:06 +00002407void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002408 void *opaque)
2409{
2410 BlockDriver *drv;
2411
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002412 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002413 it(opaque, drv->format_name);
2414 }
2415}
2416
bellardb3380822004-03-14 21:38:54 +00002417BlockDriverState *bdrv_find(const char *name)
2418{
2419 BlockDriverState *bs;
2420
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002421 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2422 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002423 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002424 }
bellardb3380822004-03-14 21:38:54 +00002425 }
2426 return NULL;
2427}
2428
Markus Armbruster2f399b02010-06-02 18:55:20 +02002429BlockDriverState *bdrv_next(BlockDriverState *bs)
2430{
2431 if (!bs) {
2432 return QTAILQ_FIRST(&bdrv_states);
2433 }
2434 return QTAILQ_NEXT(bs, list);
2435}
2436
aliguori51de9762009-03-05 23:00:43 +00002437void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002438{
2439 BlockDriverState *bs;
2440
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002441 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002442 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002443 }
2444}
2445
bellardea2384d2004-08-01 21:59:26 +00002446const char *bdrv_get_device_name(BlockDriverState *bs)
2447{
2448 return bs->device_name;
2449}
2450
aliguoric6ca28d2008-10-06 13:55:43 +00002451void bdrv_flush_all(void)
2452{
2453 BlockDriverState *bs;
2454
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002455 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002456 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002457 }
aliguoric6ca28d2008-10-06 13:55:43 +00002458}
2459
Kevin Wolff2feebb2010-04-14 17:30:35 +02002460int bdrv_has_zero_init(BlockDriverState *bs)
2461{
2462 assert(bs->drv);
2463
Kevin Wolf336c1c12010-07-28 11:26:29 +02002464 if (bs->drv->bdrv_has_zero_init) {
2465 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002466 }
2467
2468 return 1;
2469}
2470
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002471typedef struct BdrvCoIsAllocatedData {
2472 BlockDriverState *bs;
2473 int64_t sector_num;
2474 int nb_sectors;
2475 int *pnum;
2476 int ret;
2477 bool done;
2478} BdrvCoIsAllocatedData;
2479
thsf58c7b32008-06-05 21:53:49 +00002480/*
2481 * Returns true iff the specified sector is present in the disk image. Drivers
2482 * not implementing the functionality are assumed to not support backing files,
2483 * hence all their sectors are reported as allocated.
2484 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002485 * If 'sector_num' is beyond the end of the disk image the return value is 0
2486 * and 'pnum' is set to 0.
2487 *
thsf58c7b32008-06-05 21:53:49 +00002488 * 'pnum' is set to the number of sectors (including and immediately following
2489 * the specified sector) that are known to be in the same
2490 * allocated/unallocated state.
2491 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002492 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2493 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002494 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002495int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2496 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002497{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002498 int64_t n;
2499
2500 if (sector_num >= bs->total_sectors) {
2501 *pnum = 0;
2502 return 0;
2503 }
2504
2505 n = bs->total_sectors - sector_num;
2506 if (n < nb_sectors) {
2507 nb_sectors = n;
2508 }
2509
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002510 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002511 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002512 return 1;
2513 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002514
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002515 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2516}
2517
2518/* Coroutine wrapper for bdrv_is_allocated() */
2519static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2520{
2521 BdrvCoIsAllocatedData *data = opaque;
2522 BlockDriverState *bs = data->bs;
2523
2524 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2525 data->pnum);
2526 data->done = true;
2527}
2528
2529/*
2530 * Synchronous wrapper around bdrv_co_is_allocated().
2531 *
2532 * See bdrv_co_is_allocated() for details.
2533 */
2534int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2535 int *pnum)
2536{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002537 Coroutine *co;
2538 BdrvCoIsAllocatedData data = {
2539 .bs = bs,
2540 .sector_num = sector_num,
2541 .nb_sectors = nb_sectors,
2542 .pnum = pnum,
2543 .done = false,
2544 };
2545
2546 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2547 qemu_coroutine_enter(co, &data);
2548 while (!data.done) {
2549 qemu_aio_wait();
2550 }
2551 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002552}
2553
Luiz Capitulinob2023812011-09-21 17:16:47 -03002554BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002555{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002556 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002557 BlockDriverState *bs;
2558
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002559 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002560 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002561
Luiz Capitulinob2023812011-09-21 17:16:47 -03002562 info->value = g_malloc0(sizeof(*info->value));
2563 info->value->device = g_strdup(bs->device_name);
2564 info->value->type = g_strdup("unknown");
2565 info->value->locked = bdrv_dev_is_medium_locked(bs);
2566 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002567
Markus Armbrustere4def802011-09-06 18:58:53 +02002568 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002569 info->value->has_tray_open = true;
2570 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002571 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002572
2573 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002574 info->value->has_io_status = true;
2575 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002576 }
2577
bellard19cb3732006-08-19 11:45:59 +00002578 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002579 info->value->has_inserted = true;
2580 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2581 info->value->inserted->file = g_strdup(bs->filename);
2582 info->value->inserted->ro = bs->read_only;
2583 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2584 info->value->inserted->encrypted = bs->encrypted;
2585 if (bs->backing_file[0]) {
2586 info->value->inserted->has_backing_file = true;
2587 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002588 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002589
2590 if (bs->io_limits_enabled) {
2591 info->value->inserted->bps =
2592 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2593 info->value->inserted->bps_rd =
2594 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2595 info->value->inserted->bps_wr =
2596 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2597 info->value->inserted->iops =
2598 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2599 info->value->inserted->iops_rd =
2600 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2601 info->value->inserted->iops_wr =
2602 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2603 }
bellardb3380822004-03-14 21:38:54 +00002604 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002605
2606 /* XXX: waiting for the qapi to support GSList */
2607 if (!cur_item) {
2608 head = cur_item = info;
2609 } else {
2610 cur_item->next = info;
2611 cur_item = info;
2612 }
bellardb3380822004-03-14 21:38:54 +00002613 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002614
Luiz Capitulinob2023812011-09-21 17:16:47 -03002615 return head;
bellardb3380822004-03-14 21:38:54 +00002616}
thsa36e69d2007-12-02 05:18:19 +00002617
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002618/* Consider exposing this as a full fledged QMP command */
2619static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002620{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002621 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002622
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002623 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002624
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002625 if (bs->device_name[0]) {
2626 s->has_device = true;
2627 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002628 }
2629
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002630 s->stats = g_malloc0(sizeof(*s->stats));
2631 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2632 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2633 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2634 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2635 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2636 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2637 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2638 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2639 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2640
Kevin Wolf294cc352010-04-28 14:34:01 +02002641 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002642 s->has_parent = true;
2643 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002644 }
2645
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002646 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002647}
2648
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002649BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002650{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002651 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002652 BlockDriverState *bs;
2653
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002654 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002655 BlockStatsList *info = g_malloc0(sizeof(*info));
2656 info->value = qmp_query_blockstat(bs, NULL);
2657
2658 /* XXX: waiting for the qapi to support GSList */
2659 if (!cur_item) {
2660 head = cur_item = info;
2661 } else {
2662 cur_item->next = info;
2663 cur_item = info;
2664 }
thsa36e69d2007-12-02 05:18:19 +00002665 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002666
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002667 return head;
thsa36e69d2007-12-02 05:18:19 +00002668}
bellardea2384d2004-08-01 21:59:26 +00002669
aliguori045df332009-03-05 23:00:48 +00002670const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2671{
2672 if (bs->backing_hd && bs->backing_hd->encrypted)
2673 return bs->backing_file;
2674 else if (bs->encrypted)
2675 return bs->filename;
2676 else
2677 return NULL;
2678}
2679
ths5fafdf22007-09-16 21:08:06 +00002680void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002681 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002682{
Kevin Wolf3574c602011-10-26 11:02:11 +02002683 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002684}
2685
ths5fafdf22007-09-16 21:08:06 +00002686int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002687 const uint8_t *buf, int nb_sectors)
2688{
2689 BlockDriver *drv = bs->drv;
2690 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002691 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002692 if (!drv->bdrv_write_compressed)
2693 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002694 if (bdrv_check_request(bs, sector_num, nb_sectors))
2695 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002696
Jan Kiszkac6d22832009-11-30 18:21:20 +01002697 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002698 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2699 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002700
bellardfaea38e2006-08-05 21:31:00 +00002701 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2702}
ths3b46e622007-09-17 08:09:54 +00002703
bellardfaea38e2006-08-05 21:31:00 +00002704int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2705{
2706 BlockDriver *drv = bs->drv;
2707 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002708 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002709 if (!drv->bdrv_get_info)
2710 return -ENOTSUP;
2711 memset(bdi, 0, sizeof(*bdi));
2712 return drv->bdrv_get_info(bs, bdi);
2713}
2714
Christoph Hellwig45566e92009-07-10 23:11:57 +02002715int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2716 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002717{
2718 BlockDriver *drv = bs->drv;
2719 if (!drv)
2720 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002721 if (drv->bdrv_save_vmstate)
2722 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2723 if (bs->file)
2724 return bdrv_save_vmstate(bs->file, buf, pos, size);
2725 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002726}
2727
Christoph Hellwig45566e92009-07-10 23:11:57 +02002728int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2729 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002730{
2731 BlockDriver *drv = bs->drv;
2732 if (!drv)
2733 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002734 if (drv->bdrv_load_vmstate)
2735 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2736 if (bs->file)
2737 return bdrv_load_vmstate(bs->file, buf, pos, size);
2738 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002739}
2740
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002741void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2742{
2743 BlockDriver *drv = bs->drv;
2744
2745 if (!drv || !drv->bdrv_debug_event) {
2746 return;
2747 }
2748
2749 return drv->bdrv_debug_event(bs, event);
2750
2751}
2752
bellardfaea38e2006-08-05 21:31:00 +00002753/**************************************************************/
2754/* handling of snapshots */
2755
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002756int bdrv_can_snapshot(BlockDriverState *bs)
2757{
2758 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002759 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002760 return 0;
2761 }
2762
2763 if (!drv->bdrv_snapshot_create) {
2764 if (bs->file != NULL) {
2765 return bdrv_can_snapshot(bs->file);
2766 }
2767 return 0;
2768 }
2769
2770 return 1;
2771}
2772
Blue Swirl199630b2010-07-25 20:49:34 +00002773int bdrv_is_snapshot(BlockDriverState *bs)
2774{
2775 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2776}
2777
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002778BlockDriverState *bdrv_snapshots(void)
2779{
2780 BlockDriverState *bs;
2781
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002782 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002783 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002784 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002785
2786 bs = NULL;
2787 while ((bs = bdrv_next(bs))) {
2788 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002789 bs_snapshots = bs;
2790 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002791 }
2792 }
2793 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002794}
2795
ths5fafdf22007-09-16 21:08:06 +00002796int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002797 QEMUSnapshotInfo *sn_info)
2798{
2799 BlockDriver *drv = bs->drv;
2800 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002801 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002802 if (drv->bdrv_snapshot_create)
2803 return drv->bdrv_snapshot_create(bs, sn_info);
2804 if (bs->file)
2805 return bdrv_snapshot_create(bs->file, sn_info);
2806 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002807}
2808
ths5fafdf22007-09-16 21:08:06 +00002809int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002810 const char *snapshot_id)
2811{
2812 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002813 int ret, open_ret;
2814
bellardfaea38e2006-08-05 21:31:00 +00002815 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002816 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002817 if (drv->bdrv_snapshot_goto)
2818 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2819
2820 if (bs->file) {
2821 drv->bdrv_close(bs);
2822 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2823 open_ret = drv->bdrv_open(bs, bs->open_flags);
2824 if (open_ret < 0) {
2825 bdrv_delete(bs->file);
2826 bs->drv = NULL;
2827 return open_ret;
2828 }
2829 return ret;
2830 }
2831
2832 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002833}
2834
2835int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2836{
2837 BlockDriver *drv = bs->drv;
2838 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002839 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002840 if (drv->bdrv_snapshot_delete)
2841 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2842 if (bs->file)
2843 return bdrv_snapshot_delete(bs->file, snapshot_id);
2844 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002845}
2846
ths5fafdf22007-09-16 21:08:06 +00002847int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002848 QEMUSnapshotInfo **psn_info)
2849{
2850 BlockDriver *drv = bs->drv;
2851 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002852 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002853 if (drv->bdrv_snapshot_list)
2854 return drv->bdrv_snapshot_list(bs, psn_info);
2855 if (bs->file)
2856 return bdrv_snapshot_list(bs->file, psn_info);
2857 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002858}
2859
edison51ef6722010-09-21 19:58:41 -07002860int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2861 const char *snapshot_name)
2862{
2863 BlockDriver *drv = bs->drv;
2864 if (!drv) {
2865 return -ENOMEDIUM;
2866 }
2867 if (!bs->read_only) {
2868 return -EINVAL;
2869 }
2870 if (drv->bdrv_snapshot_load_tmp) {
2871 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2872 }
2873 return -ENOTSUP;
2874}
2875
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002876BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2877 const char *backing_file)
2878{
2879 if (!bs->drv) {
2880 return NULL;
2881 }
2882
2883 if (bs->backing_hd) {
2884 if (strcmp(bs->backing_file, backing_file) == 0) {
2885 return bs->backing_hd;
2886 } else {
2887 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2888 }
2889 }
2890
2891 return NULL;
2892}
2893
bellardfaea38e2006-08-05 21:31:00 +00002894#define NB_SUFFIXES 4
2895
2896char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2897{
2898 static const char suffixes[NB_SUFFIXES] = "KMGT";
2899 int64_t base;
2900 int i;
2901
2902 if (size <= 999) {
2903 snprintf(buf, buf_size, "%" PRId64, size);
2904 } else {
2905 base = 1024;
2906 for(i = 0; i < NB_SUFFIXES; i++) {
2907 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002908 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002909 (double)size / base,
2910 suffixes[i]);
2911 break;
2912 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002913 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002914 ((size + (base >> 1)) / base),
2915 suffixes[i]);
2916 break;
2917 }
2918 base = base * 1024;
2919 }
2920 }
2921 return buf;
2922}
2923
2924char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2925{
2926 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002927#ifdef _WIN32
2928 struct tm *ptm;
2929#else
bellardfaea38e2006-08-05 21:31:00 +00002930 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002931#endif
bellardfaea38e2006-08-05 21:31:00 +00002932 time_t ti;
2933 int64_t secs;
2934
2935 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002936 snprintf(buf, buf_size,
2937 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002938 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2939 } else {
2940 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002941#ifdef _WIN32
2942 ptm = localtime(&ti);
2943 strftime(date_buf, sizeof(date_buf),
2944 "%Y-%m-%d %H:%M:%S", ptm);
2945#else
bellardfaea38e2006-08-05 21:31:00 +00002946 localtime_r(&ti, &tm);
2947 strftime(date_buf, sizeof(date_buf),
2948 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002949#endif
bellardfaea38e2006-08-05 21:31:00 +00002950 secs = sn->vm_clock_nsec / 1000000000;
2951 snprintf(clock_buf, sizeof(clock_buf),
2952 "%02d:%02d:%02d.%03d",
2953 (int)(secs / 3600),
2954 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002955 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002956 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2957 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002958 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002959 sn->id_str, sn->name,
2960 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2961 date_buf,
2962 clock_buf);
2963 }
2964 return buf;
2965}
2966
bellard83f64092006-08-01 16:21:11 +00002967/**************************************************************/
2968/* async I/Os */
2969
aliguori3b69e4b2009-01-22 16:59:24 +00002970BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002971 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002972 BlockDriverCompletionFunc *cb, void *opaque)
2973{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002974 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2975
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002976 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002977 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002978}
2979
aliguorif141eaf2009-04-07 18:43:24 +00002980BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2981 QEMUIOVector *qiov, int nb_sectors,
2982 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002983{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002984 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2985
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002986 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002987 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002988}
2989
Kevin Wolf40b4f532009-09-09 17:53:37 +02002990
2991typedef struct MultiwriteCB {
2992 int error;
2993 int num_requests;
2994 int num_callbacks;
2995 struct {
2996 BlockDriverCompletionFunc *cb;
2997 void *opaque;
2998 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002999 } callbacks[];
3000} MultiwriteCB;
3001
3002static void multiwrite_user_cb(MultiwriteCB *mcb)
3003{
3004 int i;
3005
3006 for (i = 0; i < mcb->num_callbacks; i++) {
3007 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01003008 if (mcb->callbacks[i].free_qiov) {
3009 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3010 }
Anthony Liguori7267c092011-08-20 22:09:37 -05003011 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003012 }
3013}
3014
3015static void multiwrite_cb(void *opaque, int ret)
3016{
3017 MultiwriteCB *mcb = opaque;
3018
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003019 trace_multiwrite_cb(mcb, ret);
3020
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02003021 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02003022 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003023 }
3024
3025 mcb->num_requests--;
3026 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02003027 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05003028 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003029 }
3030}
3031
3032static int multiwrite_req_compare(const void *a, const void *b)
3033{
Christoph Hellwig77be4362010-05-19 20:53:10 +02003034 const BlockRequest *req1 = a, *req2 = b;
3035
3036 /*
3037 * Note that we can't simply subtract req2->sector from req1->sector
3038 * here as that could overflow the return value.
3039 */
3040 if (req1->sector > req2->sector) {
3041 return 1;
3042 } else if (req1->sector < req2->sector) {
3043 return -1;
3044 } else {
3045 return 0;
3046 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02003047}
3048
3049/*
3050 * Takes a bunch of requests and tries to merge them. Returns the number of
3051 * requests that remain after merging.
3052 */
3053static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3054 int num_reqs, MultiwriteCB *mcb)
3055{
3056 int i, outidx;
3057
3058 // Sort requests by start sector
3059 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3060
3061 // Check if adjacent requests touch the same clusters. If so, combine them,
3062 // filling up gaps with zero sectors.
3063 outidx = 0;
3064 for (i = 1; i < num_reqs; i++) {
3065 int merge = 0;
3066 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3067
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003068 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02003069 if (reqs[i].sector <= oldreq_last) {
3070 merge = 1;
3071 }
3072
Christoph Hellwige2a305f2010-01-26 14:49:08 +01003073 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3074 merge = 0;
3075 }
3076
Kevin Wolf40b4f532009-09-09 17:53:37 +02003077 if (merge) {
3078 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05003079 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003080 qemu_iovec_init(qiov,
3081 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3082
3083 // Add the first request to the merged one. If the requests are
3084 // overlapping, drop the last sectors of the first request.
3085 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3086 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3087
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003088 // We should need to add any zeros between the two requests
3089 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003090
3091 // Add the second request
3092 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3093
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003094 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003095 reqs[outidx].qiov = qiov;
3096
3097 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3098 } else {
3099 outidx++;
3100 reqs[outidx].sector = reqs[i].sector;
3101 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3102 reqs[outidx].qiov = reqs[i].qiov;
3103 }
3104 }
3105
3106 return outidx + 1;
3107}
3108
3109/*
3110 * Submit multiple AIO write requests at once.
3111 *
3112 * On success, the function returns 0 and all requests in the reqs array have
3113 * been submitted. In error case this function returns -1, and any of the
3114 * requests may or may not be submitted yet. In particular, this means that the
3115 * callback will be called for some of the requests, for others it won't. The
3116 * caller must check the error field of the BlockRequest to wait for the right
3117 * callbacks (if error != 0, no callback will be called).
3118 *
3119 * The implementation may modify the contents of the reqs array, e.g. to merge
3120 * requests. However, the fields opaque and error are left unmodified as they
3121 * are used to signal failure for a single request to the caller.
3122 */
3123int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3124{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003125 MultiwriteCB *mcb;
3126 int i;
3127
Ryan Harper301db7c2011-03-07 10:01:04 -06003128 /* don't submit writes if we don't have a medium */
3129 if (bs->drv == NULL) {
3130 for (i = 0; i < num_reqs; i++) {
3131 reqs[i].error = -ENOMEDIUM;
3132 }
3133 return -1;
3134 }
3135
Kevin Wolf40b4f532009-09-09 17:53:37 +02003136 if (num_reqs == 0) {
3137 return 0;
3138 }
3139
3140 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003141 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003142 mcb->num_requests = 0;
3143 mcb->num_callbacks = num_reqs;
3144
3145 for (i = 0; i < num_reqs; i++) {
3146 mcb->callbacks[i].cb = reqs[i].cb;
3147 mcb->callbacks[i].opaque = reqs[i].opaque;
3148 }
3149
3150 // Check for mergable requests
3151 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3152
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003153 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3154
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003155 /* Run the aio requests. */
3156 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003157 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003158 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003159 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003160 }
3161
3162 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003163}
3164
bellard83f64092006-08-01 16:21:11 +00003165void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003166{
aliguori6bbff9a2009-03-20 18:25:59 +00003167 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003168}
3169
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003170/* block I/O throttling */
3171static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3172 bool is_write, double elapsed_time, uint64_t *wait)
3173{
3174 uint64_t bps_limit = 0;
3175 double bytes_limit, bytes_base, bytes_res;
3176 double slice_time, wait_time;
3177
3178 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3179 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3180 } else if (bs->io_limits.bps[is_write]) {
3181 bps_limit = bs->io_limits.bps[is_write];
3182 } else {
3183 if (wait) {
3184 *wait = 0;
3185 }
3186
3187 return false;
3188 }
3189
3190 slice_time = bs->slice_end - bs->slice_start;
3191 slice_time /= (NANOSECONDS_PER_SECOND);
3192 bytes_limit = bps_limit * slice_time;
3193 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3194 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3195 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3196 }
3197
3198 /* bytes_base: the bytes of data which have been read/written; and
3199 * it is obtained from the history statistic info.
3200 * bytes_res: the remaining bytes of data which need to be read/written.
3201 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3202 * the total time for completing reading/writting all data.
3203 */
3204 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3205
3206 if (bytes_base + bytes_res <= bytes_limit) {
3207 if (wait) {
3208 *wait = 0;
3209 }
3210
3211 return false;
3212 }
3213
3214 /* Calc approx time to dispatch */
3215 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3216
3217 /* When the I/O rate at runtime exceeds the limits,
3218 * bs->slice_end need to be extended in order that the current statistic
3219 * info can be kept until the timer fire, so it is increased and tuned
3220 * based on the result of experiment.
3221 */
3222 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3223 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3224 if (wait) {
3225 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3226 }
3227
3228 return true;
3229}
3230
3231static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3232 double elapsed_time, uint64_t *wait)
3233{
3234 uint64_t iops_limit = 0;
3235 double ios_limit, ios_base;
3236 double slice_time, wait_time;
3237
3238 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3239 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3240 } else if (bs->io_limits.iops[is_write]) {
3241 iops_limit = bs->io_limits.iops[is_write];
3242 } else {
3243 if (wait) {
3244 *wait = 0;
3245 }
3246
3247 return false;
3248 }
3249
3250 slice_time = bs->slice_end - bs->slice_start;
3251 slice_time /= (NANOSECONDS_PER_SECOND);
3252 ios_limit = iops_limit * slice_time;
3253 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3254 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3255 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3256 }
3257
3258 if (ios_base + 1 <= ios_limit) {
3259 if (wait) {
3260 *wait = 0;
3261 }
3262
3263 return false;
3264 }
3265
3266 /* Calc approx time to dispatch */
3267 wait_time = (ios_base + 1) / iops_limit;
3268 if (wait_time > elapsed_time) {
3269 wait_time = wait_time - elapsed_time;
3270 } else {
3271 wait_time = 0;
3272 }
3273
3274 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3275 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3276 if (wait) {
3277 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3278 }
3279
3280 return true;
3281}
3282
3283static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3284 bool is_write, int64_t *wait)
3285{
3286 int64_t now, max_wait;
3287 uint64_t bps_wait = 0, iops_wait = 0;
3288 double elapsed_time;
3289 int bps_ret, iops_ret;
3290
3291 now = qemu_get_clock_ns(vm_clock);
3292 if ((bs->slice_start < now)
3293 && (bs->slice_end > now)) {
3294 bs->slice_end = now + bs->slice_time;
3295 } else {
3296 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3297 bs->slice_start = now;
3298 bs->slice_end = now + bs->slice_time;
3299
3300 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3301 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3302
3303 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3304 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3305 }
3306
3307 elapsed_time = now - bs->slice_start;
3308 elapsed_time /= (NANOSECONDS_PER_SECOND);
3309
3310 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3311 is_write, elapsed_time, &bps_wait);
3312 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3313 elapsed_time, &iops_wait);
3314 if (bps_ret || iops_ret) {
3315 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3316 if (wait) {
3317 *wait = max_wait;
3318 }
3319
3320 now = qemu_get_clock_ns(vm_clock);
3321 if (bs->slice_end < now + max_wait) {
3322 bs->slice_end = now + max_wait;
3323 }
3324
3325 return true;
3326 }
3327
3328 if (wait) {
3329 *wait = 0;
3330 }
3331
3332 return false;
3333}
pbrookce1a14d2006-08-07 02:38:06 +00003334
bellard83f64092006-08-01 16:21:11 +00003335/**************************************************************/
3336/* async block device emulation */
3337
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003338typedef struct BlockDriverAIOCBSync {
3339 BlockDriverAIOCB common;
3340 QEMUBH *bh;
3341 int ret;
3342 /* vector translation state */
3343 QEMUIOVector *qiov;
3344 uint8_t *bounce;
3345 int is_write;
3346} BlockDriverAIOCBSync;
3347
3348static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3349{
Kevin Wolfb666d232010-05-05 11:44:39 +02003350 BlockDriverAIOCBSync *acb =
3351 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003352 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003353 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003354 qemu_aio_release(acb);
3355}
3356
3357static AIOPool bdrv_em_aio_pool = {
3358 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3359 .cancel = bdrv_aio_cancel_em,
3360};
3361
bellard83f64092006-08-01 16:21:11 +00003362static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003363{
pbrookce1a14d2006-08-07 02:38:06 +00003364 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003365
aliguorif141eaf2009-04-07 18:43:24 +00003366 if (!acb->is_write)
3367 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003368 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003369 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003370 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003371 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003372 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003373}
bellardbeac80c2006-06-26 20:08:57 +00003374
aliguorif141eaf2009-04-07 18:43:24 +00003375static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3376 int64_t sector_num,
3377 QEMUIOVector *qiov,
3378 int nb_sectors,
3379 BlockDriverCompletionFunc *cb,
3380 void *opaque,
3381 int is_write)
3382
bellardea2384d2004-08-01 21:59:26 +00003383{
pbrookce1a14d2006-08-07 02:38:06 +00003384 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003385
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003386 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003387 acb->is_write = is_write;
3388 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003389 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003390 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003391
3392 if (is_write) {
3393 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003394 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003395 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003396 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003397 }
3398
pbrookce1a14d2006-08-07 02:38:06 +00003399 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003400
pbrookce1a14d2006-08-07 02:38:06 +00003401 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003402}
3403
aliguorif141eaf2009-04-07 18:43:24 +00003404static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3405 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003406 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003407{
aliguorif141eaf2009-04-07 18:43:24 +00003408 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003409}
3410
aliguorif141eaf2009-04-07 18:43:24 +00003411static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3412 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3413 BlockDriverCompletionFunc *cb, void *opaque)
3414{
3415 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3416}
3417
Kevin Wolf68485422011-06-30 10:05:46 +02003418
3419typedef struct BlockDriverAIOCBCoroutine {
3420 BlockDriverAIOCB common;
3421 BlockRequest req;
3422 bool is_write;
3423 QEMUBH* bh;
3424} BlockDriverAIOCBCoroutine;
3425
3426static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3427{
3428 qemu_aio_flush();
3429}
3430
3431static AIOPool bdrv_em_co_aio_pool = {
3432 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3433 .cancel = bdrv_aio_co_cancel_em,
3434};
3435
Paolo Bonzini35246a62011-10-14 10:41:29 +02003436static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003437{
3438 BlockDriverAIOCBCoroutine *acb = opaque;
3439
3440 acb->common.cb(acb->common.opaque, acb->req.error);
3441 qemu_bh_delete(acb->bh);
3442 qemu_aio_release(acb);
3443}
3444
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003445/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3446static void coroutine_fn bdrv_co_do_rw(void *opaque)
3447{
3448 BlockDriverAIOCBCoroutine *acb = opaque;
3449 BlockDriverState *bs = acb->common.bs;
3450
3451 if (!acb->is_write) {
3452 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003453 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003454 } else {
3455 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003456 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003457 }
3458
Paolo Bonzini35246a62011-10-14 10:41:29 +02003459 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003460 qemu_bh_schedule(acb->bh);
3461}
3462
Kevin Wolf68485422011-06-30 10:05:46 +02003463static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3464 int64_t sector_num,
3465 QEMUIOVector *qiov,
3466 int nb_sectors,
3467 BlockDriverCompletionFunc *cb,
3468 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003469 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003470{
3471 Coroutine *co;
3472 BlockDriverAIOCBCoroutine *acb;
3473
3474 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3475 acb->req.sector = sector_num;
3476 acb->req.nb_sectors = nb_sectors;
3477 acb->req.qiov = qiov;
3478 acb->is_write = is_write;
3479
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003480 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003481 qemu_coroutine_enter(co, acb);
3482
3483 return &acb->common;
3484}
3485
Paolo Bonzini07f07612011-10-17 12:32:12 +02003486static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003487{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003488 BlockDriverAIOCBCoroutine *acb = opaque;
3489 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003490
Paolo Bonzini07f07612011-10-17 12:32:12 +02003491 acb->req.error = bdrv_co_flush(bs);
3492 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003493 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003494}
3495
Paolo Bonzini07f07612011-10-17 12:32:12 +02003496BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003497 BlockDriverCompletionFunc *cb, void *opaque)
3498{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003499 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003500
Paolo Bonzini07f07612011-10-17 12:32:12 +02003501 Coroutine *co;
3502 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003503
Paolo Bonzini07f07612011-10-17 12:32:12 +02003504 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3505 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3506 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003507
Alexander Graf016f5cf2010-05-26 17:51:49 +02003508 return &acb->common;
3509}
3510
Paolo Bonzini4265d622011-10-17 12:32:14 +02003511static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3512{
3513 BlockDriverAIOCBCoroutine *acb = opaque;
3514 BlockDriverState *bs = acb->common.bs;
3515
3516 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3517 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3518 qemu_bh_schedule(acb->bh);
3519}
3520
3521BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3522 int64_t sector_num, int nb_sectors,
3523 BlockDriverCompletionFunc *cb, void *opaque)
3524{
3525 Coroutine *co;
3526 BlockDriverAIOCBCoroutine *acb;
3527
3528 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3529
3530 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3531 acb->req.sector = sector_num;
3532 acb->req.nb_sectors = nb_sectors;
3533 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3534 qemu_coroutine_enter(co, acb);
3535
3536 return &acb->common;
3537}
3538
bellardea2384d2004-08-01 21:59:26 +00003539void bdrv_init(void)
3540{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003541 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003542}
pbrookce1a14d2006-08-07 02:38:06 +00003543
Markus Armbrustereb852012009-10-27 18:41:44 +01003544void bdrv_init_with_whitelist(void)
3545{
3546 use_bdrv_whitelist = 1;
3547 bdrv_init();
3548}
3549
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003550void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3551 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003552{
pbrookce1a14d2006-08-07 02:38:06 +00003553 BlockDriverAIOCB *acb;
3554
aliguori6bbff9a2009-03-20 18:25:59 +00003555 if (pool->free_aiocb) {
3556 acb = pool->free_aiocb;
3557 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003558 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003559 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003560 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003561 }
3562 acb->bs = bs;
3563 acb->cb = cb;
3564 acb->opaque = opaque;
3565 return acb;
3566}
3567
3568void qemu_aio_release(void *p)
3569{
aliguori6bbff9a2009-03-20 18:25:59 +00003570 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3571 AIOPool *pool = acb->pool;
3572 acb->next = pool->free_aiocb;
3573 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003574}
bellard19cb3732006-08-19 11:45:59 +00003575
3576/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003577/* Coroutine block device emulation */
3578
3579typedef struct CoroutineIOCompletion {
3580 Coroutine *coroutine;
3581 int ret;
3582} CoroutineIOCompletion;
3583
3584static void bdrv_co_io_em_complete(void *opaque, int ret)
3585{
3586 CoroutineIOCompletion *co = opaque;
3587
3588 co->ret = ret;
3589 qemu_coroutine_enter(co->coroutine, NULL);
3590}
3591
3592static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3593 int nb_sectors, QEMUIOVector *iov,
3594 bool is_write)
3595{
3596 CoroutineIOCompletion co = {
3597 .coroutine = qemu_coroutine_self(),
3598 };
3599 BlockDriverAIOCB *acb;
3600
3601 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003602 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3603 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003604 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003605 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3606 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003607 }
3608
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003609 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003610 if (!acb) {
3611 return -EIO;
3612 }
3613 qemu_coroutine_yield();
3614
3615 return co.ret;
3616}
3617
3618static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3619 int64_t sector_num, int nb_sectors,
3620 QEMUIOVector *iov)
3621{
3622 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3623}
3624
3625static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3626 int64_t sector_num, int nb_sectors,
3627 QEMUIOVector *iov)
3628{
3629 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3630}
3631
Paolo Bonzini07f07612011-10-17 12:32:12 +02003632static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003633{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003634 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003635
Paolo Bonzini07f07612011-10-17 12:32:12 +02003636 rwco->ret = bdrv_co_flush(rwco->bs);
3637}
3638
3639int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3640{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003641 int ret;
3642
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003643 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003644 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003645 }
3646
Kevin Wolfca716362011-11-10 18:13:59 +01003647 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003648 if (bs->drv->bdrv_co_flush_to_os) {
3649 ret = bs->drv->bdrv_co_flush_to_os(bs);
3650 if (ret < 0) {
3651 return ret;
3652 }
3653 }
3654
Kevin Wolfca716362011-11-10 18:13:59 +01003655 /* But don't actually force it to the disk with cache=unsafe */
3656 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3657 return 0;
3658 }
3659
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003660 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003661 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003662 } else if (bs->drv->bdrv_aio_flush) {
3663 BlockDriverAIOCB *acb;
3664 CoroutineIOCompletion co = {
3665 .coroutine = qemu_coroutine_self(),
3666 };
3667
3668 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3669 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003670 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003671 } else {
3672 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003673 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003674 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003675 } else {
3676 /*
3677 * Some block drivers always operate in either writethrough or unsafe
3678 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3679 * know how the server works (because the behaviour is hardcoded or
3680 * depends on server-side configuration), so we can't ensure that
3681 * everything is safe on disk. Returning an error doesn't work because
3682 * that would break guests even if the server operates in writethrough
3683 * mode.
3684 *
3685 * Let's hope the user knows what he's doing.
3686 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003687 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003688 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003689 if (ret < 0) {
3690 return ret;
3691 }
3692
3693 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3694 * in the case of cache=unsafe, so there are no useless flushes.
3695 */
3696 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003697}
3698
Anthony Liguori0f154232011-11-14 15:09:45 -06003699void bdrv_invalidate_cache(BlockDriverState *bs)
3700{
3701 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3702 bs->drv->bdrv_invalidate_cache(bs);
3703 }
3704}
3705
3706void bdrv_invalidate_cache_all(void)
3707{
3708 BlockDriverState *bs;
3709
3710 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3711 bdrv_invalidate_cache(bs);
3712 }
3713}
3714
Benoît Canet07789262012-03-23 08:36:49 +01003715void bdrv_clear_incoming_migration_all(void)
3716{
3717 BlockDriverState *bs;
3718
3719 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3720 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3721 }
3722}
3723
Paolo Bonzini07f07612011-10-17 12:32:12 +02003724int bdrv_flush(BlockDriverState *bs)
3725{
3726 Coroutine *co;
3727 RwCo rwco = {
3728 .bs = bs,
3729 .ret = NOT_DONE,
3730 };
3731
3732 if (qemu_in_coroutine()) {
3733 /* Fast-path if already in coroutine context */
3734 bdrv_flush_co_entry(&rwco);
3735 } else {
3736 co = qemu_coroutine_create(bdrv_flush_co_entry);
3737 qemu_coroutine_enter(co, &rwco);
3738 while (rwco.ret == NOT_DONE) {
3739 qemu_aio_wait();
3740 }
3741 }
3742
3743 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003744}
3745
Paolo Bonzini4265d622011-10-17 12:32:14 +02003746static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3747{
3748 RwCo *rwco = opaque;
3749
3750 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3751}
3752
3753int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3754 int nb_sectors)
3755{
3756 if (!bs->drv) {
3757 return -ENOMEDIUM;
3758 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3759 return -EIO;
3760 } else if (bs->read_only) {
3761 return -EROFS;
3762 } else if (bs->drv->bdrv_co_discard) {
3763 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3764 } else if (bs->drv->bdrv_aio_discard) {
3765 BlockDriverAIOCB *acb;
3766 CoroutineIOCompletion co = {
3767 .coroutine = qemu_coroutine_self(),
3768 };
3769
3770 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3771 bdrv_co_io_em_complete, &co);
3772 if (acb == NULL) {
3773 return -EIO;
3774 } else {
3775 qemu_coroutine_yield();
3776 return co.ret;
3777 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003778 } else {
3779 return 0;
3780 }
3781}
3782
3783int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3784{
3785 Coroutine *co;
3786 RwCo rwco = {
3787 .bs = bs,
3788 .sector_num = sector_num,
3789 .nb_sectors = nb_sectors,
3790 .ret = NOT_DONE,
3791 };
3792
3793 if (qemu_in_coroutine()) {
3794 /* Fast-path if already in coroutine context */
3795 bdrv_discard_co_entry(&rwco);
3796 } else {
3797 co = qemu_coroutine_create(bdrv_discard_co_entry);
3798 qemu_coroutine_enter(co, &rwco);
3799 while (rwco.ret == NOT_DONE) {
3800 qemu_aio_wait();
3801 }
3802 }
3803
3804 return rwco.ret;
3805}
3806
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003807/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003808/* removable device support */
3809
3810/**
3811 * Return TRUE if the media is present
3812 */
3813int bdrv_is_inserted(BlockDriverState *bs)
3814{
3815 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003816
bellard19cb3732006-08-19 11:45:59 +00003817 if (!drv)
3818 return 0;
3819 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003820 return 1;
3821 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003822}
3823
3824/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003825 * Return whether the media changed since the last call to this
3826 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003827 */
3828int bdrv_media_changed(BlockDriverState *bs)
3829{
3830 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003831
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003832 if (drv && drv->bdrv_media_changed) {
3833 return drv->bdrv_media_changed(bs);
3834 }
3835 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003836}
3837
3838/**
3839 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3840 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003841void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003842{
3843 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003844
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003845 if (drv && drv->bdrv_eject) {
3846 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003847 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003848
3849 if (bs->device_name[0] != '\0') {
3850 bdrv_emit_qmp_eject_event(bs, eject_flag);
3851 }
bellard19cb3732006-08-19 11:45:59 +00003852}
3853
bellard19cb3732006-08-19 11:45:59 +00003854/**
3855 * Lock or unlock the media (if it is locked, the user won't be able
3856 * to eject it manually).
3857 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003858void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003859{
3860 BlockDriver *drv = bs->drv;
3861
Markus Armbruster025e8492011-09-06 18:58:47 +02003862 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003863
Markus Armbruster025e8492011-09-06 18:58:47 +02003864 if (drv && drv->bdrv_lock_medium) {
3865 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003866 }
3867}
ths985a03b2007-12-24 16:10:43 +00003868
3869/* needed for generic scsi interface */
3870
3871int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3872{
3873 BlockDriver *drv = bs->drv;
3874
3875 if (drv && drv->bdrv_ioctl)
3876 return drv->bdrv_ioctl(bs, req, buf);
3877 return -ENOTSUP;
3878}
aliguori7d780662009-03-12 19:57:08 +00003879
aliguori221f7152009-03-28 17:28:41 +00003880BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3881 unsigned long int req, void *buf,
3882 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003883{
aliguori221f7152009-03-28 17:28:41 +00003884 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003885
aliguori221f7152009-03-28 17:28:41 +00003886 if (drv && drv->bdrv_aio_ioctl)
3887 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3888 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003889}
aliguorie268ca52009-04-22 20:20:00 +00003890
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003891void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3892{
3893 bs->buffer_alignment = align;
3894}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003895
aliguorie268ca52009-04-22 20:20:00 +00003896void *qemu_blockalign(BlockDriverState *bs, size_t size)
3897{
3898 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3899}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003900
3901void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3902{
3903 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003904
Liran Schouraaa0eb72010-01-26 10:31:48 +02003905 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003906 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003907 if (!bs->dirty_bitmap) {
3908 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003909 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3910 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003911
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003912 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003913 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003914 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003915 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003916 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003917 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003918 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003919 }
3920}
3921
3922int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3923{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003924 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003925
Jan Kiszkac6d22832009-11-30 18:21:20 +01003926 if (bs->dirty_bitmap &&
3927 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003928 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3929 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003930 } else {
3931 return 0;
3932 }
3933}
3934
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003935void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3936 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003937{
3938 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3939}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003940
3941int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3942{
3943 return bs->dirty_count;
3944}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003945
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003946void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3947{
3948 assert(bs->in_use != in_use);
3949 bs->in_use = in_use;
3950}
3951
3952int bdrv_in_use(BlockDriverState *bs)
3953{
3954 return bs->in_use;
3955}
3956
Luiz Capitulino28a72822011-09-26 17:43:50 -03003957void bdrv_iostatus_enable(BlockDriverState *bs)
3958{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003959 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003960 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003961}
3962
3963/* The I/O status is only enabled if the drive explicitly
3964 * enables it _and_ the VM is configured to stop on errors */
3965bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3966{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003967 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003968 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3969 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3970 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3971}
3972
3973void bdrv_iostatus_disable(BlockDriverState *bs)
3974{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003975 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003976}
3977
3978void bdrv_iostatus_reset(BlockDriverState *bs)
3979{
3980 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003981 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003982 }
3983}
3984
3985/* XXX: Today this is set by device models because it makes the implementation
3986 quite simple. However, the block layer knows about the error, so it's
3987 possible to implement this without device models being involved */
3988void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3989{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003990 if (bdrv_iostatus_is_enabled(bs) &&
3991 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003992 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003993 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3994 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003995 }
3996}
3997
Christoph Hellwiga597e792011-08-25 08:26:01 +02003998void
3999bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4000 enum BlockAcctType type)
4001{
4002 assert(type < BDRV_MAX_IOTYPE);
4003
4004 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004005 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02004006 cookie->type = type;
4007}
4008
4009void
4010bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4011{
4012 assert(cookie->type < BDRV_MAX_IOTYPE);
4013
4014 bs->nr_bytes[cookie->type] += cookie->bytes;
4015 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004016 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02004017}
4018
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004019int bdrv_img_create(const char *filename, const char *fmt,
4020 const char *base_filename, const char *base_fmt,
4021 char *options, uint64_t img_size, int flags)
4022{
4023 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02004024 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004025 BlockDriverState *bs = NULL;
4026 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004027 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004028 int ret = 0;
4029
4030 /* Find driver and parse its options */
4031 drv = bdrv_find_format(fmt);
4032 if (!drv) {
4033 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004034 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004035 goto out;
4036 }
4037
4038 proto_drv = bdrv_find_protocol(filename);
4039 if (!proto_drv) {
4040 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004041 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004042 goto out;
4043 }
4044
4045 create_options = append_option_parameters(create_options,
4046 drv->create_options);
4047 create_options = append_option_parameters(create_options,
4048 proto_drv->create_options);
4049
4050 /* Create parameter list with default values */
4051 param = parse_option_parameters("", create_options, param);
4052
4053 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4054
4055 /* Parse -o options */
4056 if (options) {
4057 param = parse_option_parameters(options, create_options, param);
4058 if (param == NULL) {
4059 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004060 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004061 goto out;
4062 }
4063 }
4064
4065 if (base_filename) {
4066 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4067 base_filename)) {
4068 error_report("Backing file not supported for file format '%s'",
4069 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004070 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004071 goto out;
4072 }
4073 }
4074
4075 if (base_fmt) {
4076 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4077 error_report("Backing file format not supported for file "
4078 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004079 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004080 goto out;
4081 }
4082 }
4083
Jes Sorensen792da932010-12-16 13:52:17 +01004084 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4085 if (backing_file && backing_file->value.s) {
4086 if (!strcmp(filename, backing_file->value.s)) {
4087 error_report("Error: Trying to create an image with the "
4088 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004089 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004090 goto out;
4091 }
4092 }
4093
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004094 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4095 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004096 backing_drv = bdrv_find_format(backing_fmt->value.s);
4097 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004098 error_report("Unknown backing file format '%s'",
4099 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004100 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004101 goto out;
4102 }
4103 }
4104
4105 // The size for the image must always be specified, with one exception:
4106 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004107 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4108 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004109 if (backing_file && backing_file->value.s) {
4110 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004111 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02004112 int back_flags;
4113
4114 /* backing files always opened read-only */
4115 back_flags =
4116 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004117
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004118 bs = bdrv_new("");
4119
Paolo Bonzini63090da2012-04-12 14:01:03 +02004120 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004121 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004122 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004123 goto out;
4124 }
4125 bdrv_get_geometry(bs, &size);
4126 size *= 512;
4127
4128 snprintf(buf, sizeof(buf), "%" PRId64, size);
4129 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4130 } else {
4131 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004132 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004133 goto out;
4134 }
4135 }
4136
4137 printf("Formatting '%s', fmt=%s ", filename, fmt);
4138 print_option_parameters(param);
4139 puts("");
4140
4141 ret = bdrv_create(drv, filename, param);
4142
4143 if (ret < 0) {
4144 if (ret == -ENOTSUP) {
4145 error_report("Formatting or formatting option not supported for "
4146 "file format '%s'", fmt);
4147 } else if (ret == -EFBIG) {
4148 error_report("The image size is too large for file format '%s'",
4149 fmt);
4150 } else {
4151 error_report("%s: error while creating %s: %s", filename, fmt,
4152 strerror(-ret));
4153 }
4154 }
4155
4156out:
4157 free_option_parameters(create_options);
4158 free_option_parameters(param);
4159
4160 if (bs) {
4161 bdrv_delete(bs);
4162 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004163
4164 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004165}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004166
4167void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004168 int64_t speed, BlockDriverCompletionFunc *cb,
4169 void *opaque, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004170{
4171 BlockJob *job;
4172
4173 if (bs->job || bdrv_in_use(bs)) {
Stefan Hajnoczifd7f8c62012-04-25 16:51:00 +01004174 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004175 return NULL;
4176 }
4177 bdrv_set_in_use(bs, 1);
4178
4179 job = g_malloc0(job_type->instance_size);
4180 job->job_type = job_type;
4181 job->bs = bs;
4182 job->cb = cb;
4183 job->opaque = opaque;
Paolo Bonzini4513eaf2012-05-08 16:51:45 +02004184 job->busy = true;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004185 bs->job = job;
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004186
4187 /* Only set speed when necessary to avoid NotSupported error */
4188 if (speed != 0) {
4189 Error *local_err = NULL;
4190
4191 block_job_set_speed(job, speed, &local_err);
4192 if (error_is_set(&local_err)) {
4193 bs->job = NULL;
4194 g_free(job);
4195 bdrv_set_in_use(bs, 0);
4196 error_propagate(errp, local_err);
4197 return NULL;
4198 }
4199 }
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004200 return job;
4201}
4202
4203void block_job_complete(BlockJob *job, int ret)
4204{
4205 BlockDriverState *bs = job->bs;
4206
4207 assert(bs->job == job);
4208 job->cb(job->opaque, ret);
4209 bs->job = NULL;
4210 g_free(job);
4211 bdrv_set_in_use(bs, 0);
4212}
4213
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004214void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004215{
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004216 Error *local_err = NULL;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004217
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004218 if (!job->job_type->set_speed) {
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004219 error_set(errp, QERR_NOT_SUPPORTED);
4220 return;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004221 }
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004222 job->job_type->set_speed(job, speed, &local_err);
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004223 if (error_is_set(&local_err)) {
4224 error_propagate(errp, local_err);
4225 return;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004226 }
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004227
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004228 job->speed = speed;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004229}
4230
4231void block_job_cancel(BlockJob *job)
4232{
4233 job->cancelled = true;
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004234 if (job->co && !job->busy) {
4235 qemu_coroutine_enter(job->co, NULL);
4236 }
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004237}
4238
4239bool block_job_is_cancelled(BlockJob *job)
4240{
4241 return job->cancelled;
4242}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004243
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004244struct BlockCancelData {
4245 BlockJob *job;
4246 BlockDriverCompletionFunc *cb;
4247 void *opaque;
4248 bool cancelled;
4249 int ret;
4250};
4251
4252static void block_job_cancel_cb(void *opaque, int ret)
Paolo Bonzini3e914652012-03-30 13:17:11 +02004253{
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004254 struct BlockCancelData *data = opaque;
4255
4256 data->cancelled = block_job_is_cancelled(data->job);
4257 data->ret = ret;
4258 data->cb(data->opaque, ret);
4259}
4260
4261int block_job_cancel_sync(BlockJob *job)
4262{
4263 struct BlockCancelData data;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004264 BlockDriverState *bs = job->bs;
4265
4266 assert(bs->job == job);
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004267
4268 /* Set up our own callback to store the result and chain to
4269 * the original callback.
4270 */
4271 data.job = job;
4272 data.cb = job->cb;
4273 data.opaque = job->opaque;
4274 data.ret = -EINPROGRESS;
4275 job->cb = block_job_cancel_cb;
4276 job->opaque = &data;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004277 block_job_cancel(job);
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004278 while (data.ret == -EINPROGRESS) {
Paolo Bonzini3e914652012-03-30 13:17:11 +02004279 qemu_aio_wait();
4280 }
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004281 return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004282}
Paolo Bonzini4513eaf2012-05-08 16:51:45 +02004283
4284void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
4285{
4286 /* Check cancellation *before* setting busy = false, too! */
4287 if (!block_job_is_cancelled(job)) {
4288 job->busy = false;
4289 co_sleep_ns(clock, ns);
4290 job->busy = true;
4291 }
4292}