blob: 29869987a9fd19de80fcade03062571339cdef36 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010083static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
bellardec530c82006-04-25 22:36:06 +000085
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080086static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010093static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000095
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010096static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000098
Markus Armbrusterf9092b12010-06-25 10:33:39 +020099/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
Markus Armbrustereb852012009-10-27 18:41:44 +0100102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800125/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200201 const char *p;
202
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000203#ifdef _WIN32
204 if (is_windows_drive(path) ||
205 is_windows_drive_prefix(path)) {
206 return 0;
207 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200208 p = path + strcspn(path, ":/\\");
209#else
210 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000211#endif
212
Paolo Bonzini947995c2012-05-08 16:51:48 +0200213 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000214}
215
bellard83f64092006-08-01 16:21:11 +0000216int path_is_absolute(const char *path)
217{
bellard21664422007-01-07 18:22:37 +0000218#ifdef _WIN32
219 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200220 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000221 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200222 }
223 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000224#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200225 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800342typedef struct CreateCo {
343 BlockDriver *drv;
344 char *filename;
345 QEMUOptionParameter *options;
346 int ret;
347} CreateCo;
348
349static void coroutine_fn bdrv_create_co_entry(void *opaque)
350{
351 CreateCo *cco = opaque;
352 assert(cco->drv);
353
354 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
355}
356
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200357int bdrv_create(BlockDriver *drv, const char* filename,
358 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000359{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800360 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200361
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800362 Coroutine *co;
363 CreateCo cco = {
364 .drv = drv,
365 .filename = g_strdup(filename),
366 .options = options,
367 .ret = NOT_DONE,
368 };
369
370 if (!drv->bdrv_create) {
371 return -ENOTSUP;
372 }
373
374 if (qemu_in_coroutine()) {
375 /* Fast-path if already in coroutine context */
376 bdrv_create_co_entry(&cco);
377 } else {
378 co = qemu_coroutine_create(bdrv_create_co_entry);
379 qemu_coroutine_enter(co, &cco);
380 while (cco.ret == NOT_DONE) {
381 qemu_aio_wait();
382 }
383 }
384
385 ret = cco.ret;
386 g_free(cco.filename);
387
388 return ret;
bellardea2384d2004-08-01 21:59:26 +0000389}
390
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200391int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
392{
393 BlockDriver *drv;
394
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900395 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200396 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000397 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200398 }
399
400 return bdrv_create(drv, filename, options);
401}
402
bellardd5249392004-08-03 21:14:23 +0000403#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000404void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000405{
bellard3b9f94e2007-01-07 17:27:07 +0000406 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000407
bellard3b9f94e2007-01-07 17:27:07 +0000408 GetTempPath(MAX_PATH, temp_dir);
409 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000410}
411#else
bellard95389c82005-12-18 18:28:15 +0000412void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000413{
414 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000415 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000416 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000417 tmpdir = getenv("TMPDIR");
418 if (!tmpdir)
419 tmpdir = "/tmp";
420 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000421 fd = mkstemp(filename);
422 close(fd);
423}
bellardd5249392004-08-03 21:14:23 +0000424#endif
bellardea2384d2004-08-01 21:59:26 +0000425
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200426/*
427 * Detect host devices. By convention, /dev/cdrom[N] is always
428 * recognized as a host CDROM.
429 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200430static BlockDriver *find_hdev_driver(const char *filename)
431{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200432 int score_max = 0, score;
433 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200434
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100435 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200436 if (d->bdrv_probe_device) {
437 score = d->bdrv_probe_device(filename);
438 if (score > score_max) {
439 score_max = score;
440 drv = d;
441 }
442 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200443 }
444
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200445 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200446}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200447
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900448BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200449{
450 BlockDriver *drv1;
451 char protocol[128];
452 int len;
453 const char *p;
454
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200455 /* TODO Drivers without bdrv_file_open must be specified explicitly */
456
Christoph Hellwig39508e72010-06-23 12:25:17 +0200457 /*
458 * XXX(hch): we really should not let host device detection
459 * override an explicit protocol specification, but moving this
460 * later breaks access to device names with colons in them.
461 * Thanks to the brain-dead persistent naming schemes on udev-
462 * based Linux systems those actually are quite common.
463 */
464 drv1 = find_hdev_driver(filename);
465 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200466 return drv1;
467 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200468
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000469 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200470 return bdrv_find_format("file");
471 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000472 p = strchr(filename, ':');
473 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200474 len = p - filename;
475 if (len > sizeof(protocol) - 1)
476 len = sizeof(protocol) - 1;
477 memcpy(protocol, filename, len);
478 protocol[len] = '\0';
479 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
480 if (drv1->protocol_name &&
481 !strcmp(drv1->protocol_name, protocol)) {
482 return drv1;
483 }
484 }
485 return NULL;
486}
487
Stefan Weilc98ac352010-07-21 21:51:51 +0200488static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000489{
bellard83f64092006-08-01 16:21:11 +0000490 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000491 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000492 uint8_t buf[2048];
493 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000494
Naphtali Spreif5edb012010-01-17 16:48:13 +0200495 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200496 if (ret < 0) {
497 *pdrv = NULL;
498 return ret;
499 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700500
Kevin Wolf08a00552010-06-01 18:37:31 +0200501 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
502 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700503 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200504 drv = bdrv_find_format("raw");
505 if (!drv) {
506 ret = -ENOENT;
507 }
508 *pdrv = drv;
509 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700510 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700511
bellard83f64092006-08-01 16:21:11 +0000512 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
513 bdrv_delete(bs);
514 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200515 *pdrv = NULL;
516 return ret;
bellard83f64092006-08-01 16:21:11 +0000517 }
518
bellardea2384d2004-08-01 21:59:26 +0000519 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200520 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100521 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000522 if (drv1->bdrv_probe) {
523 score = drv1->bdrv_probe(buf, ret, filename);
524 if (score > score_max) {
525 score_max = score;
526 drv = drv1;
527 }
bellardea2384d2004-08-01 21:59:26 +0000528 }
529 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200530 if (!drv) {
531 ret = -ENOENT;
532 }
533 *pdrv = drv;
534 return ret;
bellardea2384d2004-08-01 21:59:26 +0000535}
536
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100537/**
538 * Set the current 'total_sectors' value
539 */
540static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
541{
542 BlockDriver *drv = bs->drv;
543
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700544 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
545 if (bs->sg)
546 return 0;
547
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100548 /* query actual device if possible, otherwise just trust the hint */
549 if (drv->bdrv_getlength) {
550 int64_t length = drv->bdrv_getlength(bs);
551 if (length < 0) {
552 return length;
553 }
554 hint = length >> BDRV_SECTOR_BITS;
555 }
556
557 bs->total_sectors = hint;
558 return 0;
559}
560
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100561/**
562 * Set open flags for a given cache mode
563 *
564 * Return 0 on success, -1 if the cache mode was invalid.
565 */
566int bdrv_parse_cache_flags(const char *mode, int *flags)
567{
568 *flags &= ~BDRV_O_CACHE_MASK;
569
570 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
571 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100572 } else if (!strcmp(mode, "directsync")) {
573 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100574 } else if (!strcmp(mode, "writeback")) {
575 *flags |= BDRV_O_CACHE_WB;
576 } else if (!strcmp(mode, "unsafe")) {
577 *flags |= BDRV_O_CACHE_WB;
578 *flags |= BDRV_O_NO_FLUSH;
579 } else if (!strcmp(mode, "writethrough")) {
580 /* this is the default */
581 } else {
582 return -1;
583 }
584
585 return 0;
586}
587
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000588/**
589 * The copy-on-read flag is actually a reference count so multiple users may
590 * use the feature without worrying about clobbering its previous state.
591 * Copy-on-read stays enabled until all users have called to disable it.
592 */
593void bdrv_enable_copy_on_read(BlockDriverState *bs)
594{
595 bs->copy_on_read++;
596}
597
598void bdrv_disable_copy_on_read(BlockDriverState *bs)
599{
600 assert(bs->copy_on_read > 0);
601 bs->copy_on_read--;
602}
603
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200604/*
Kevin Wolf57915332010-04-14 15:24:50 +0200605 * Common part for opening disk images and files
606 */
607static int bdrv_open_common(BlockDriverState *bs, const char *filename,
608 int flags, BlockDriver *drv)
609{
610 int ret, open_flags;
611
612 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200613 assert(bs->file == NULL);
Kevin Wolf57915332010-04-14 15:24:50 +0200614
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100615 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
616
Kevin Wolf57915332010-04-14 15:24:50 +0200617 bs->open_flags = flags;
Kevin Wolf57915332010-04-14 15:24:50 +0200618 bs->buffer_alignment = 512;
619
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000620 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
621 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
622 bdrv_enable_copy_on_read(bs);
623 }
624
Kevin Wolf57915332010-04-14 15:24:50 +0200625 pstrcpy(bs->filename, sizeof(bs->filename), filename);
626
627 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
628 return -ENOTSUP;
629 }
630
631 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500632 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200633
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100634 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200635
636 /*
637 * Clear flags that are internal to the block layer before opening the
638 * image.
639 */
640 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
641
642 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200643 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200644 */
645 if (bs->is_temporary) {
646 open_flags |= BDRV_O_RDWR;
647 }
648
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100649 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
650
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200651 /* Open the image, either directly or using a protocol */
652 if (drv->bdrv_file_open) {
653 ret = drv->bdrv_file_open(bs, filename, open_flags);
654 } else {
655 ret = bdrv_file_open(&bs->file, filename, open_flags);
656 if (ret >= 0) {
657 ret = drv->bdrv_open(bs, open_flags);
658 }
659 }
660
Kevin Wolf57915332010-04-14 15:24:50 +0200661 if (ret < 0) {
662 goto free_and_fail;
663 }
664
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100665 ret = refresh_total_sectors(bs, bs->total_sectors);
666 if (ret < 0) {
667 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200668 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100669
Kevin Wolf57915332010-04-14 15:24:50 +0200670#ifndef _WIN32
671 if (bs->is_temporary) {
672 unlink(filename);
673 }
674#endif
675 return 0;
676
677free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200678 if (bs->file) {
679 bdrv_delete(bs->file);
680 bs->file = NULL;
681 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500682 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200683 bs->opaque = NULL;
684 bs->drv = NULL;
685 return ret;
686}
687
688/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200689 * Opens a file using a protocol (file, host_device, nbd, ...)
690 */
bellard83f64092006-08-01 16:21:11 +0000691int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000692{
bellard83f64092006-08-01 16:21:11 +0000693 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200694 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000695 int ret;
696
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900697 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200698 if (!drv) {
699 return -ENOENT;
700 }
701
bellard83f64092006-08-01 16:21:11 +0000702 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200703 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000704 if (ret < 0) {
705 bdrv_delete(bs);
706 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000707 }
aliguori71d07702009-03-03 17:37:16 +0000708 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000709 *pbs = bs;
710 return 0;
bellardea2384d2004-08-01 21:59:26 +0000711}
bellardfc01f7e2003-06-30 10:03:06 +0000712
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200713/*
714 * Opens a disk image (raw, qcow2, vmdk, ...)
715 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200716int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
717 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000718{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200719 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200720 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000721
bellard83f64092006-08-01 16:21:11 +0000722 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000723 BlockDriverState *bs1;
724 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000725 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 BlockDriver *bdrv_qcow2;
727 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200728 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000729
bellardea2384d2004-08-01 21:59:26 +0000730 /* if snapshot, we create a temporary backing file and open it
731 instead of opening 'filename' directly */
732
733 /* if there is a backing file, use it */
734 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200735 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000736 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000737 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000738 return ret;
bellardea2384d2004-08-01 21:59:26 +0000739 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200740 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000741
742 if (bs1->drv && bs1->drv->protocol_name)
743 is_protocol = 1;
744
bellardea2384d2004-08-01 21:59:26 +0000745 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000746
bellardea2384d2004-08-01 21:59:26 +0000747 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000748
749 /* Real path is meaningless for protocols */
750 if (is_protocol)
751 snprintf(backing_filename, sizeof(backing_filename),
752 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000753 else if (!realpath(filename, backing_filename))
754 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000755
Kevin Wolf91a073a2009-05-27 14:48:06 +0200756 bdrv_qcow2 = bdrv_find_format("qcow2");
757 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
758
Jes Sorensen3e829902010-05-27 16:20:30 +0200759 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200760 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
761 if (drv) {
762 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
763 drv->format_name);
764 }
765
766 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200767 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000768 if (ret < 0) {
769 return ret;
bellardea2384d2004-08-01 21:59:26 +0000770 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200771
bellardea2384d2004-08-01 21:59:26 +0000772 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200773 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000774 bs->is_temporary = 1;
775 }
bellard712e7872005-04-28 21:09:32 +0000776
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200778 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200779 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000780 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100781
aliguori51d7c002009-03-05 23:00:29 +0000782 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000783 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000784 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200785
786 /* Open the image */
787 ret = bdrv_open_common(bs, filename, flags, drv);
788 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100789 goto unlink_and_fail;
790 }
791
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200792 /* If there is a backing file, use it */
793 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
794 char backing_filename[PATH_MAX];
795 int back_flags;
796 BlockDriver *back_drv = NULL;
797
798 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000799
800 if (path_has_protocol(bs->backing_file)) {
801 pstrcpy(backing_filename, sizeof(backing_filename),
802 bs->backing_file);
803 } else {
804 path_combine(backing_filename, sizeof(backing_filename),
805 filename, bs->backing_file);
806 }
807
808 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200809 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000810 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200811
812 /* backing files always opened read-only */
813 back_flags =
814 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
815
816 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
817 if (ret < 0) {
818 bdrv_close(bs);
819 return ret;
820 }
821 if (bs->is_temporary) {
822 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
823 } else {
824 /* base image inherits from "parent" */
825 bs->backing_hd->keep_read_only = bs->keep_read_only;
826 }
827 }
828
829 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200830 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200831 }
832
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833 /* throttling disk I/O limits */
834 if (bs->io_limits_enabled) {
835 bdrv_io_limits_enable(bs);
836 }
837
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200838 return 0;
839
840unlink_and_fail:
841 if (bs->is_temporary) {
842 unlink(filename);
843 }
844 return ret;
845}
846
bellardfc01f7e2003-06-30 10:03:06 +0000847void bdrv_close(BlockDriverState *bs)
848{
Liu Yuan80ccf932012-04-20 17:10:56 +0800849 bdrv_flush(bs);
bellard19cb3732006-08-19 11:45:59 +0000850 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200851 if (bs->job) {
852 block_job_cancel_sync(bs->job);
853 }
Kevin Wolf7094f122012-04-11 11:06:37 +0200854 bdrv_drain_all();
855
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200856 if (bs == bs_snapshots) {
857 bs_snapshots = NULL;
858 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100859 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000860 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100861 bs->backing_hd = NULL;
862 }
bellardea2384d2004-08-01 21:59:26 +0000863 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500864 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000865#ifdef _WIN32
866 if (bs->is_temporary) {
867 unlink(bs->filename);
868 }
bellard67b915a2004-03-31 23:37:16 +0000869#endif
bellardea2384d2004-08-01 21:59:26 +0000870 bs->opaque = NULL;
871 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000872 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +0200873 bs->backing_file[0] = '\0';
874 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +0200875 bs->total_sectors = 0;
876 bs->encrypted = 0;
877 bs->valid_key = 0;
878 bs->sg = 0;
879 bs->growable = 0;
bellardb3380822004-03-14 21:38:54 +0000880
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200881 if (bs->file != NULL) {
Paolo Bonzini0ac93772012-05-08 16:51:44 +0200882 bdrv_delete(bs->file);
883 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200884 }
885
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200886 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000887 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800888
889 /*throttling disk I/O limits*/
890 if (bs->io_limits_enabled) {
891 bdrv_io_limits_disable(bs);
892 }
bellardb3380822004-03-14 21:38:54 +0000893}
894
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900895void bdrv_close_all(void)
896{
897 BlockDriverState *bs;
898
899 QTAILQ_FOREACH(bs, &bdrv_states, list) {
900 bdrv_close(bs);
901 }
902}
903
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000904/*
905 * Wait for pending requests to complete across all BlockDriverStates
906 *
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200909 *
910 * Note that completion of an asynchronous I/O operation can trigger any
911 * number of other I/O operations on other devices---for example a coroutine
912 * can be arbitrarily complex and a constant flow of I/O can come until the
913 * coroutine is complete. Because of this, it is not possible to have a
914 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000915 */
916void bdrv_drain_all(void)
917{
918 BlockDriverState *bs;
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200919 bool busy;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000920
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200921 do {
922 busy = qemu_aio_wait();
923
924 /* FIXME: We do not have timer support here, so this is effectively
925 * a busy wait.
926 */
927 QTAILQ_FOREACH(bs, &bdrv_states, list) {
928 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
929 qemu_co_queue_restart_all(&bs->throttled_reqs);
930 busy = true;
931 }
932 }
933 } while (busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000934
935 /* If requests are still pending there is a bug somewhere */
936 QTAILQ_FOREACH(bs, &bdrv_states, list) {
937 assert(QLIST_EMPTY(&bs->tracked_requests));
938 assert(qemu_co_queue_empty(&bs->throttled_reqs));
939 }
940}
941
Ryan Harperd22b2f42011-03-29 20:51:47 -0500942/* make a BlockDriverState anonymous by removing from bdrv_state list.
943 Also, NULL terminate the device_name to prevent double remove */
944void bdrv_make_anon(BlockDriverState *bs)
945{
946 if (bs->device_name[0] != '\0') {
947 QTAILQ_REMOVE(&bdrv_states, bs, list);
948 }
949 bs->device_name[0] = '\0';
950}
951
Paolo Bonzinie023b2e2012-05-08 16:51:41 +0200952static void bdrv_rebind(BlockDriverState *bs)
953{
954 if (bs->drv && bs->drv->bdrv_rebind) {
955 bs->drv->bdrv_rebind(bs);
956 }
957}
958
Jeff Cody8802d1f2012-02-28 15:54:06 -0500959/*
960 * Add new bs contents at the top of an image chain while the chain is
961 * live, while keeping required fields on the top layer.
962 *
963 * This will modify the BlockDriverState fields, and swap contents
964 * between bs_new and bs_top. Both bs_new and bs_top are modified.
965 *
Jeff Codyf6801b82012-03-27 16:30:19 -0400966 * bs_new is required to be anonymous.
967 *
Jeff Cody8802d1f2012-02-28 15:54:06 -0500968 * This function does not create any image files.
969 */
970void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
971{
972 BlockDriverState tmp;
973
Jeff Codyf6801b82012-03-27 16:30:19 -0400974 /* bs_new must be anonymous */
975 assert(bs_new->device_name[0] == '\0');
Jeff Cody8802d1f2012-02-28 15:54:06 -0500976
977 tmp = *bs_new;
978
979 /* there are some fields that need to stay on the top layer: */
Paolo Bonzini3a389e72012-05-08 16:51:42 +0200980 tmp.open_flags = bs_top->open_flags;
Jeff Cody8802d1f2012-02-28 15:54:06 -0500981
982 /* dev info */
983 tmp.dev_ops = bs_top->dev_ops;
984 tmp.dev_opaque = bs_top->dev_opaque;
985 tmp.dev = bs_top->dev;
986 tmp.buffer_alignment = bs_top->buffer_alignment;
987 tmp.copy_on_read = bs_top->copy_on_read;
988
989 /* i/o timing parameters */
990 tmp.slice_time = bs_top->slice_time;
991 tmp.slice_start = bs_top->slice_start;
992 tmp.slice_end = bs_top->slice_end;
993 tmp.io_limits = bs_top->io_limits;
994 tmp.io_base = bs_top->io_base;
995 tmp.throttled_reqs = bs_top->throttled_reqs;
996 tmp.block_timer = bs_top->block_timer;
997 tmp.io_limits_enabled = bs_top->io_limits_enabled;
998
999 /* geometry */
1000 tmp.cyls = bs_top->cyls;
1001 tmp.heads = bs_top->heads;
1002 tmp.secs = bs_top->secs;
1003 tmp.translation = bs_top->translation;
1004
1005 /* r/w error */
1006 tmp.on_read_error = bs_top->on_read_error;
1007 tmp.on_write_error = bs_top->on_write_error;
1008
1009 /* i/o status */
1010 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1011 tmp.iostatus = bs_top->iostatus;
1012
1013 /* keep the same entry in bdrv_states */
1014 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1015 tmp.list = bs_top->list;
1016
1017 /* The contents of 'tmp' will become bs_top, as we are
1018 * swapping bs_new and bs_top contents. */
1019 tmp.backing_hd = bs_new;
1020 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
Jeff Codyf6801b82012-03-27 16:30:19 -04001021 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
Jeff Cody8802d1f2012-02-28 15:54:06 -05001022
1023 /* swap contents of the fixed new bs and the current top */
1024 *bs_new = *bs_top;
1025 *bs_top = tmp;
1026
Jeff Codyf6801b82012-03-27 16:30:19 -04001027 /* device_name[] was carried over from the old bs_top. bs_new
1028 * shouldn't be in bdrv_states, so we need to make device_name[]
1029 * reflect the anonymity of bs_new
1030 */
1031 bs_new->device_name[0] = '\0';
1032
Jeff Cody8802d1f2012-02-28 15:54:06 -05001033 /* clear the copied fields in the new backing file */
1034 bdrv_detach_dev(bs_new, bs_new->dev);
1035
1036 qemu_co_queue_init(&bs_new->throttled_reqs);
1037 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1038 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1039 bdrv_iostatus_disable(bs_new);
1040
1041 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1042 * to affect or delete the block_timer, as it has been moved to bs_top */
1043 bs_new->io_limits_enabled = false;
1044 bs_new->block_timer = NULL;
1045 bs_new->slice_time = 0;
1046 bs_new->slice_start = 0;
1047 bs_new->slice_end = 0;
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001048
1049 bdrv_rebind(bs_new);
1050 bdrv_rebind(bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001051}
1052
bellardb3380822004-03-14 21:38:54 +00001053void bdrv_delete(BlockDriverState *bs)
1054{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001055 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001056 assert(!bs->job);
1057 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +02001058
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001059 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001060 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001061
bellardb3380822004-03-14 21:38:54 +00001062 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001063
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001064 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001066}
1067
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001068int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1069/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001070{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001071 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001072 return -EBUSY;
1073 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001074 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001075 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001076 return 0;
1077}
1078
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001079/* TODO qdevified devices don't use this, remove when devices are qdevified */
1080void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001081{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001082 if (bdrv_attach_dev(bs, dev) < 0) {
1083 abort();
1084 }
1085}
1086
1087void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1088/* TODO change to DeviceState *dev when all users are qdevified */
1089{
1090 assert(bs->dev == dev);
1091 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001092 bs->dev_ops = NULL;
1093 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001094 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001095}
1096
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001097/* TODO change to return DeviceState * when all users are qdevified */
1098void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001099{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001100 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001101}
1102
Markus Armbruster0e49de52011-08-03 15:07:41 +02001103void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1104 void *opaque)
1105{
1106 bs->dev_ops = ops;
1107 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001108 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1109 bs_snapshots = NULL;
1110 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001111}
1112
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001113void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1114 BlockQMPEventAction action, int is_read)
1115{
1116 QObject *data;
1117 const char *action_str;
1118
1119 switch (action) {
1120 case BDRV_ACTION_REPORT:
1121 action_str = "report";
1122 break;
1123 case BDRV_ACTION_IGNORE:
1124 action_str = "ignore";
1125 break;
1126 case BDRV_ACTION_STOP:
1127 action_str = "stop";
1128 break;
1129 default:
1130 abort();
1131 }
1132
1133 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1134 bdrv->device_name,
1135 action_str,
1136 is_read ? "read" : "write");
1137 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1138
1139 qobject_decref(data);
1140}
1141
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001142static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1143{
1144 QObject *data;
1145
1146 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1147 bdrv_get_device_name(bs), ejected);
1148 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1149
1150 qobject_decref(data);
1151}
1152
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001153static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001154{
Markus Armbruster145feb12011-08-03 15:07:42 +02001155 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001156 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001157 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001158 if (tray_was_closed) {
1159 /* tray open */
1160 bdrv_emit_qmp_eject_event(bs, true);
1161 }
1162 if (load) {
1163 /* tray close */
1164 bdrv_emit_qmp_eject_event(bs, false);
1165 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001166 }
1167}
1168
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001169bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1170{
1171 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1172}
1173
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001174void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1175{
1176 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1177 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1178 }
1179}
1180
Markus Armbrustere4def802011-09-06 18:58:53 +02001181bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1182{
1183 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1184 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1185 }
1186 return false;
1187}
1188
Markus Armbruster145feb12011-08-03 15:07:42 +02001189static void bdrv_dev_resize_cb(BlockDriverState *bs)
1190{
1191 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1192 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001193 }
1194}
1195
Markus Armbrusterf1076392011-09-06 18:58:46 +02001196bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1197{
1198 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1199 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1200 }
1201 return false;
1202}
1203
aliguorie97fc192009-04-21 23:11:50 +00001204/*
1205 * Run consistency checks on an image
1206 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001207 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001208 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001209 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001210 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001211int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001212{
1213 if (bs->drv->bdrv_check == NULL) {
1214 return -ENOTSUP;
1215 }
1216
Kevin Wolfe076f332010-06-29 11:43:13 +02001217 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001218 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001219}
1220
Kevin Wolf8a426612010-07-16 17:17:01 +02001221#define COMMIT_BUF_SECTORS 2048
1222
bellard33e39632003-07-06 17:15:21 +00001223/* commit COW file into the raw image */
1224int bdrv_commit(BlockDriverState *bs)
1225{
bellard19cb3732006-08-19 11:45:59 +00001226 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001227 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001228 int64_t sector, total_sectors;
1229 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001230 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001231 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001232 char filename[1024];
1233 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001234
bellard19cb3732006-08-19 11:45:59 +00001235 if (!drv)
1236 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001237
1238 if (!bs->backing_hd) {
1239 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001240 }
1241
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001242 if (bs->backing_hd->keep_read_only) {
1243 return -EACCES;
1244 }
Kevin Wolfee181192010-08-05 13:05:22 +02001245
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001246 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1247 return -EBUSY;
1248 }
1249
Kevin Wolfee181192010-08-05 13:05:22 +02001250 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001251 ro = bs->backing_hd->read_only;
1252 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1253 open_flags = bs->backing_hd->open_flags;
1254
1255 if (ro) {
1256 /* re-open as RW */
1257 bdrv_delete(bs->backing_hd);
1258 bs->backing_hd = NULL;
1259 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001260 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1261 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001262 if (rw_ret < 0) {
1263 bdrv_delete(bs_rw);
1264 /* try to re-open read-only */
1265 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001266 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1267 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001268 if (ret < 0) {
1269 bdrv_delete(bs_ro);
1270 /* drive not functional anymore */
1271 bs->drv = NULL;
1272 return ret;
1273 }
1274 bs->backing_hd = bs_ro;
1275 return rw_ret;
1276 }
1277 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001278 }
bellardea2384d2004-08-01 21:59:26 +00001279
Jan Kiszka6ea44302009-11-30 18:21:19 +01001280 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001281 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001282
Kevin Wolf8a426612010-07-16 17:17:01 +02001283 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001284 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001285
1286 if (bdrv_read(bs, sector, buf, n) != 0) {
1287 ret = -EIO;
1288 goto ro_cleanup;
1289 }
1290
1291 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1292 ret = -EIO;
1293 goto ro_cleanup;
1294 }
bellardea2384d2004-08-01 21:59:26 +00001295 }
1296 }
bellard95389c82005-12-18 18:28:15 +00001297
Christoph Hellwig1d449522010-01-17 12:32:30 +01001298 if (drv->bdrv_make_empty) {
1299 ret = drv->bdrv_make_empty(bs);
1300 bdrv_flush(bs);
1301 }
bellard95389c82005-12-18 18:28:15 +00001302
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001303 /*
1304 * Make sure all data we wrote to the backing device is actually
1305 * stable on disk.
1306 */
1307 if (bs->backing_hd)
1308 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001309
1310ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001311 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001312
1313 if (ro) {
1314 /* re-open as RO */
1315 bdrv_delete(bs->backing_hd);
1316 bs->backing_hd = NULL;
1317 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001318 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1319 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001320 if (ret < 0) {
1321 bdrv_delete(bs_ro);
1322 /* drive not functional anymore */
1323 bs->drv = NULL;
1324 return ret;
1325 }
1326 bs->backing_hd = bs_ro;
1327 bs->backing_hd->keep_read_only = 0;
1328 }
1329
Christoph Hellwig1d449522010-01-17 12:32:30 +01001330 return ret;
bellard33e39632003-07-06 17:15:21 +00001331}
1332
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001333int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001334{
1335 BlockDriverState *bs;
1336
1337 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001338 int ret = bdrv_commit(bs);
1339 if (ret < 0) {
1340 return ret;
1341 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001342 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001343 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001344}
1345
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001346struct BdrvTrackedRequest {
1347 BlockDriverState *bs;
1348 int64_t sector_num;
1349 int nb_sectors;
1350 bool is_write;
1351 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001352 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001353 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001354};
1355
1356/**
1357 * Remove an active request from the tracked requests list
1358 *
1359 * This function should be called when a tracked request is completing.
1360 */
1361static void tracked_request_end(BdrvTrackedRequest *req)
1362{
1363 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001364 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001365}
1366
1367/**
1368 * Add an active request to the tracked requests list
1369 */
1370static void tracked_request_begin(BdrvTrackedRequest *req,
1371 BlockDriverState *bs,
1372 int64_t sector_num,
1373 int nb_sectors, bool is_write)
1374{
1375 *req = (BdrvTrackedRequest){
1376 .bs = bs,
1377 .sector_num = sector_num,
1378 .nb_sectors = nb_sectors,
1379 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001380 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001381 };
1382
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001383 qemu_co_queue_init(&req->wait_queue);
1384
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001385 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1386}
1387
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001388/**
1389 * Round a region to cluster boundaries
1390 */
1391static void round_to_clusters(BlockDriverState *bs,
1392 int64_t sector_num, int nb_sectors,
1393 int64_t *cluster_sector_num,
1394 int *cluster_nb_sectors)
1395{
1396 BlockDriverInfo bdi;
1397
1398 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1399 *cluster_sector_num = sector_num;
1400 *cluster_nb_sectors = nb_sectors;
1401 } else {
1402 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1403 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1404 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1405 nb_sectors, c);
1406 }
1407}
1408
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001409static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1410 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001411 /* aaaa bbbb */
1412 if (sector_num >= req->sector_num + req->nb_sectors) {
1413 return false;
1414 }
1415 /* bbbb aaaa */
1416 if (req->sector_num >= sector_num + nb_sectors) {
1417 return false;
1418 }
1419 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001420}
1421
1422static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1423 int64_t sector_num, int nb_sectors)
1424{
1425 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001426 int64_t cluster_sector_num;
1427 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001428 bool retry;
1429
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001430 /* If we touch the same cluster it counts as an overlap. This guarantees
1431 * that allocating writes will be serialized and not race with each other
1432 * for the same cluster. For example, in copy-on-read it ensures that the
1433 * CoR read and write operations are atomic and guest writes cannot
1434 * interleave between them.
1435 */
1436 round_to_clusters(bs, sector_num, nb_sectors,
1437 &cluster_sector_num, &cluster_nb_sectors);
1438
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001439 do {
1440 retry = false;
1441 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001442 if (tracked_request_overlaps(req, cluster_sector_num,
1443 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001444 /* Hitting this means there was a reentrant request, for
1445 * example, a block driver issuing nested requests. This must
1446 * never happen since it means deadlock.
1447 */
1448 assert(qemu_coroutine_self() != req->co);
1449
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001450 qemu_co_queue_wait(&req->wait_queue);
1451 retry = true;
1452 break;
1453 }
1454 }
1455 } while (retry);
1456}
1457
Kevin Wolf756e6732010-01-12 12:55:17 +01001458/*
1459 * Return values:
1460 * 0 - success
1461 * -EINVAL - backing format specified, but no file
1462 * -ENOSPC - can't update the backing file because no space is left in the
1463 * image file header
1464 * -ENOTSUP - format driver doesn't support changing the backing file
1465 */
1466int bdrv_change_backing_file(BlockDriverState *bs,
1467 const char *backing_file, const char *backing_fmt)
1468{
1469 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02001470 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001471
Paolo Bonzini5f377792012-04-12 14:01:01 +02001472 /* Backing file format doesn't make sense without a backing file */
1473 if (backing_fmt && !backing_file) {
1474 return -EINVAL;
1475 }
1476
Kevin Wolf756e6732010-01-12 12:55:17 +01001477 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001478 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01001479 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001480 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01001481 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02001482
1483 if (ret == 0) {
1484 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1485 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1486 }
1487 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001488}
1489
aliguori71d07702009-03-03 17:37:16 +00001490static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1491 size_t size)
1492{
1493 int64_t len;
1494
1495 if (!bdrv_is_inserted(bs))
1496 return -ENOMEDIUM;
1497
1498 if (bs->growable)
1499 return 0;
1500
1501 len = bdrv_getlength(bs);
1502
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001503 if (offset < 0)
1504 return -EIO;
1505
1506 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001507 return -EIO;
1508
1509 return 0;
1510}
1511
1512static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1513 int nb_sectors)
1514{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001515 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1516 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001517}
1518
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001519typedef struct RwCo {
1520 BlockDriverState *bs;
1521 int64_t sector_num;
1522 int nb_sectors;
1523 QEMUIOVector *qiov;
1524 bool is_write;
1525 int ret;
1526} RwCo;
1527
1528static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1529{
1530 RwCo *rwco = opaque;
1531
1532 if (!rwco->is_write) {
1533 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001534 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001535 } else {
1536 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001537 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001538 }
1539}
1540
1541/*
1542 * Process a synchronous request using coroutines
1543 */
1544static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1545 int nb_sectors, bool is_write)
1546{
1547 QEMUIOVector qiov;
1548 struct iovec iov = {
1549 .iov_base = (void *)buf,
1550 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1551 };
1552 Coroutine *co;
1553 RwCo rwco = {
1554 .bs = bs,
1555 .sector_num = sector_num,
1556 .nb_sectors = nb_sectors,
1557 .qiov = &qiov,
1558 .is_write = is_write,
1559 .ret = NOT_DONE,
1560 };
1561
1562 qemu_iovec_init_external(&qiov, &iov, 1);
1563
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001564 /**
1565 * In sync call context, when the vcpu is blocked, this throttling timer
1566 * will not fire; so the I/O throttling function has to be disabled here
1567 * if it has been enabled.
1568 */
1569 if (bs->io_limits_enabled) {
1570 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1571 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1572 bdrv_io_limits_disable(bs);
1573 }
1574
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001575 if (qemu_in_coroutine()) {
1576 /* Fast-path if already in coroutine context */
1577 bdrv_rw_co_entry(&rwco);
1578 } else {
1579 co = qemu_coroutine_create(bdrv_rw_co_entry);
1580 qemu_coroutine_enter(co, &rwco);
1581 while (rwco.ret == NOT_DONE) {
1582 qemu_aio_wait();
1583 }
1584 }
1585 return rwco.ret;
1586}
1587
bellard19cb3732006-08-19 11:45:59 +00001588/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001589int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001590 uint8_t *buf, int nb_sectors)
1591{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001592 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001593}
1594
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001595#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1596
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001597static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001598 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001599{
1600 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001601 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001602
Jan Kiszka6ea44302009-11-30 18:21:19 +01001603 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001604 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001605
1606 for (; start <= end; start++) {
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001607 idx = start / BITS_PER_LONG;
1608 bit = start % BITS_PER_LONG;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001609 val = bs->dirty_bitmap[idx];
1610 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001611 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001612 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001613 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001614 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001615 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001616 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001617 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001618 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001619 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001620 }
1621 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001622 }
1623}
1624
ths5fafdf22007-09-16 21:08:06 +00001625/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001626 -EIO generic I/O error (may happen for all errors)
1627 -ENOMEDIUM No media inserted.
1628 -EINVAL Invalid sector number or nb_sectors
1629 -EACCES Trying to write a read-only device
1630*/
ths5fafdf22007-09-16 21:08:06 +00001631int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001632 const uint8_t *buf, int nb_sectors)
1633{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001634 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001635}
1636
aliguorieda578e2009-03-12 19:57:16 +00001637int bdrv_pread(BlockDriverState *bs, int64_t offset,
1638 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001639{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001640 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001641 int len, nb_sectors, count;
1642 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001643 int ret;
bellard83f64092006-08-01 16:21:11 +00001644
1645 count = count1;
1646 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001647 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001648 if (len > count)
1649 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001650 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001651 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001652 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1653 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001654 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001655 count -= len;
1656 if (count == 0)
1657 return count1;
1658 sector_num++;
1659 buf += len;
1660 }
1661
1662 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001663 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001664 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001665 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1666 return ret;
bellard83f64092006-08-01 16:21:11 +00001667 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001668 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001669 buf += len;
1670 count -= len;
1671 }
1672
1673 /* add data from the last sector */
1674 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001675 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1676 return ret;
bellard83f64092006-08-01 16:21:11 +00001677 memcpy(buf, tmp_buf, count);
1678 }
1679 return count1;
1680}
1681
aliguorieda578e2009-03-12 19:57:16 +00001682int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1683 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001684{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001685 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001686 int len, nb_sectors, count;
1687 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001688 int ret;
bellard83f64092006-08-01 16:21:11 +00001689
1690 count = count1;
1691 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001692 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001693 if (len > count)
1694 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001695 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001696 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001697 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1698 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001699 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001700 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1701 return ret;
bellard83f64092006-08-01 16:21:11 +00001702 count -= len;
1703 if (count == 0)
1704 return count1;
1705 sector_num++;
1706 buf += len;
1707 }
1708
1709 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001710 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001711 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001712 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1713 return ret;
bellard83f64092006-08-01 16:21:11 +00001714 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001715 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001716 buf += len;
1717 count -= len;
1718 }
1719
1720 /* add data from the last sector */
1721 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001722 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1723 return ret;
bellard83f64092006-08-01 16:21:11 +00001724 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001725 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1726 return ret;
bellard83f64092006-08-01 16:21:11 +00001727 }
1728 return count1;
1729}
bellard83f64092006-08-01 16:21:11 +00001730
Kevin Wolff08145f2010-06-16 16:38:15 +02001731/*
1732 * Writes to the file and ensures that no writes are reordered across this
1733 * request (acts as a barrier)
1734 *
1735 * Returns 0 on success, -errno in error cases.
1736 */
1737int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1738 const void *buf, int count)
1739{
1740 int ret;
1741
1742 ret = bdrv_pwrite(bs, offset, buf, count);
1743 if (ret < 0) {
1744 return ret;
1745 }
1746
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001747 /* No flush needed for cache modes that use O_DSYNC */
1748 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001749 bdrv_flush(bs);
1750 }
1751
1752 return 0;
1753}
1754
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001755static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001756 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1757{
1758 /* Perform I/O through a temporary buffer so that users who scribble over
1759 * their read buffer while the operation is in progress do not end up
1760 * modifying the image file. This is critical for zero-copy guest I/O
1761 * where anything might happen inside guest memory.
1762 */
1763 void *bounce_buffer;
1764
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001765 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001766 struct iovec iov;
1767 QEMUIOVector bounce_qiov;
1768 int64_t cluster_sector_num;
1769 int cluster_nb_sectors;
1770 size_t skip_bytes;
1771 int ret;
1772
1773 /* Cover entire cluster so no additional backing file I/O is required when
1774 * allocating cluster in the image file.
1775 */
1776 round_to_clusters(bs, sector_num, nb_sectors,
1777 &cluster_sector_num, &cluster_nb_sectors);
1778
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001779 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1780 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001781
1782 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1783 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1784 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1785
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001786 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1787 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001788 if (ret < 0) {
1789 goto err;
1790 }
1791
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001792 if (drv->bdrv_co_write_zeroes &&
1793 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001794 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1795 cluster_nb_sectors);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001796 } else {
1797 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001798 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001799 }
1800
Stefan Hajnocziab185922011-11-17 13:40:31 +00001801 if (ret < 0) {
1802 /* It might be okay to ignore write errors for guest requests. If this
1803 * is a deliberate copy-on-read then we don't want to ignore the error.
1804 * Simply report it in all cases.
1805 */
1806 goto err;
1807 }
1808
1809 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1810 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1811 nb_sectors * BDRV_SECTOR_SIZE);
1812
1813err:
1814 qemu_vfree(bounce_buffer);
1815 return ret;
1816}
1817
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001818/*
1819 * Handle a read request in coroutine context
1820 */
1821static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001822 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1823 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001824{
1825 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001826 BdrvTrackedRequest req;
1827 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001828
Kevin Wolfda1fa912011-07-14 17:27:13 +02001829 if (!drv) {
1830 return -ENOMEDIUM;
1831 }
1832 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1833 return -EIO;
1834 }
1835
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001836 /* throttling disk read I/O */
1837 if (bs->io_limits_enabled) {
1838 bdrv_io_limits_intercept(bs, false, nb_sectors);
1839 }
1840
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001841 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001842 flags |= BDRV_REQ_COPY_ON_READ;
1843 }
1844 if (flags & BDRV_REQ_COPY_ON_READ) {
1845 bs->copy_on_read_in_flight++;
1846 }
1847
1848 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001849 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1850 }
1851
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001852 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001853
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001854 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001855 int pnum;
1856
1857 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1858 if (ret < 0) {
1859 goto out;
1860 }
1861
1862 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001863 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001864 goto out;
1865 }
1866 }
1867
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001868 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001869
1870out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001871 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001872
1873 if (flags & BDRV_REQ_COPY_ON_READ) {
1874 bs->copy_on_read_in_flight--;
1875 }
1876
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001877 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001878}
1879
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001880int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001881 int nb_sectors, QEMUIOVector *qiov)
1882{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001883 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001884
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001885 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1886}
1887
1888int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1889 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1890{
1891 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1892
1893 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1894 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001895}
1896
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001897static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1898 int64_t sector_num, int nb_sectors)
1899{
1900 BlockDriver *drv = bs->drv;
1901 QEMUIOVector qiov;
1902 struct iovec iov;
1903 int ret;
1904
Kevin Wolf621f0582012-03-20 15:12:58 +01001905 /* TODO Emulate only part of misaligned requests instead of letting block
1906 * drivers return -ENOTSUP and emulate everything */
1907
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001908 /* First try the efficient write zeroes operation */
1909 if (drv->bdrv_co_write_zeroes) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001910 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1911 if (ret != -ENOTSUP) {
1912 return ret;
1913 }
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001914 }
1915
1916 /* Fall back to bounce buffer if write zeroes is unsupported */
1917 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1918 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1919 memset(iov.iov_base, 0, iov.iov_len);
1920 qemu_iovec_init_external(&qiov, &iov, 1);
1921
1922 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1923
1924 qemu_vfree(iov.iov_base);
1925 return ret;
1926}
1927
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001928/*
1929 * Handle a write request in coroutine context
1930 */
1931static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001932 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1933 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001934{
1935 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001936 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001937 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001938
1939 if (!bs->drv) {
1940 return -ENOMEDIUM;
1941 }
1942 if (bs->read_only) {
1943 return -EACCES;
1944 }
1945 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1946 return -EIO;
1947 }
1948
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001949 /* throttling disk write I/O */
1950 if (bs->io_limits_enabled) {
1951 bdrv_io_limits_intercept(bs, true, nb_sectors);
1952 }
1953
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001954 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001955 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1956 }
1957
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001958 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1959
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001960 if (flags & BDRV_REQ_ZERO_WRITE) {
1961 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1962 } else {
1963 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1964 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001965
Kevin Wolfda1fa912011-07-14 17:27:13 +02001966 if (bs->dirty_bitmap) {
1967 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1968 }
1969
1970 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1971 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1972 }
1973
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001974 tracked_request_end(&req);
1975
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001976 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001977}
1978
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001979int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1980 int nb_sectors, QEMUIOVector *qiov)
1981{
1982 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1983
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001984 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1985}
1986
1987int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1988 int64_t sector_num, int nb_sectors)
1989{
1990 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1991
1992 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1993 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001994}
1995
bellard83f64092006-08-01 16:21:11 +00001996/**
bellard83f64092006-08-01 16:21:11 +00001997 * Truncate file to 'offset' bytes (needed only for file protocols)
1998 */
1999int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2000{
2001 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002002 int ret;
bellard83f64092006-08-01 16:21:11 +00002003 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002004 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00002005 if (!drv->bdrv_truncate)
2006 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02002007 if (bs->read_only)
2008 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02002009 if (bdrv_in_use(bs))
2010 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002011 ret = drv->bdrv_truncate(bs, offset);
2012 if (ret == 0) {
2013 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02002014 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002015 }
2016 return ret;
bellard83f64092006-08-01 16:21:11 +00002017}
2018
2019/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08002020 * Length of a allocated file in bytes. Sparse files are counted by actual
2021 * allocated space. Return < 0 if error or unknown.
2022 */
2023int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2024{
2025 BlockDriver *drv = bs->drv;
2026 if (!drv) {
2027 return -ENOMEDIUM;
2028 }
2029 if (drv->bdrv_get_allocated_file_size) {
2030 return drv->bdrv_get_allocated_file_size(bs);
2031 }
2032 if (bs->file) {
2033 return bdrv_get_allocated_file_size(bs->file);
2034 }
2035 return -ENOTSUP;
2036}
2037
2038/**
bellard83f64092006-08-01 16:21:11 +00002039 * Length of a file in bytes. Return < 0 if error or unknown.
2040 */
2041int64_t bdrv_getlength(BlockDriverState *bs)
2042{
2043 BlockDriver *drv = bs->drv;
2044 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002045 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002046
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002047 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002048 if (drv->bdrv_getlength) {
2049 return drv->bdrv_getlength(bs);
2050 }
bellard83f64092006-08-01 16:21:11 +00002051 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002052 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00002053}
2054
bellard19cb3732006-08-19 11:45:59 +00002055/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00002056void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00002057{
bellard19cb3732006-08-19 11:45:59 +00002058 int64_t length;
2059 length = bdrv_getlength(bs);
2060 if (length < 0)
2061 length = 0;
2062 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01002063 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00002064 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00002065}
bellardcf989512004-02-16 21:56:36 +00002066
aliguorif3d54fc2008-11-25 21:50:24 +00002067struct partition {
2068 uint8_t boot_ind; /* 0x80 - active */
2069 uint8_t head; /* starting head */
2070 uint8_t sector; /* starting sector */
2071 uint8_t cyl; /* starting cylinder */
2072 uint8_t sys_ind; /* What partition type */
2073 uint8_t end_head; /* end head */
2074 uint8_t end_sector; /* end sector */
2075 uint8_t end_cyl; /* end cylinder */
2076 uint32_t start_sect; /* starting sector counting from 0 */
2077 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02002078} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00002079
2080/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2081static int guess_disk_lchs(BlockDriverState *bs,
2082 int *pcylinders, int *pheads, int *psectors)
2083{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002084 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00002085 int ret, i, heads, sectors, cylinders;
2086 struct partition *p;
2087 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00002088 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002089 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002090
2091 bdrv_get_geometry(bs, &nb_sectors);
2092
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002093 /**
2094 * The function will be invoked during startup not only in sync I/O mode,
2095 * but also in async I/O mode. So the I/O throttling function has to
2096 * be disabled temporarily here, not permanently.
2097 */
2098 enabled = bs->io_limits_enabled;
2099 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00002100 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002101 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002102 if (ret < 0)
2103 return -1;
2104 /* test msdos magic */
2105 if (buf[510] != 0x55 || buf[511] != 0xaa)
2106 return -1;
2107 for(i = 0; i < 4; i++) {
2108 p = ((struct partition *)(buf + 0x1be)) + i;
2109 nr_sects = le32_to_cpu(p->nr_sects);
2110 if (nr_sects && p->end_head) {
2111 /* We make the assumption that the partition terminates on
2112 a cylinder boundary */
2113 heads = p->end_head + 1;
2114 sectors = p->end_sector & 63;
2115 if (sectors == 0)
2116 continue;
2117 cylinders = nb_sectors / (heads * sectors);
2118 if (cylinders < 1 || cylinders > 16383)
2119 continue;
2120 *pheads = heads;
2121 *psectors = sectors;
2122 *pcylinders = cylinders;
2123#if 0
2124 printf("guessed geometry: LCHS=%d %d %d\n",
2125 cylinders, heads, sectors);
2126#endif
2127 return 0;
2128 }
2129 }
2130 return -1;
2131}
2132
2133void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2134{
2135 int translation, lba_detected = 0;
2136 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002137 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002138
2139 /* if a geometry hint is available, use it */
2140 bdrv_get_geometry(bs, &nb_sectors);
2141 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2142 translation = bdrv_get_translation_hint(bs);
2143 if (cylinders != 0) {
2144 *pcyls = cylinders;
2145 *pheads = heads;
2146 *psecs = secs;
2147 } else {
2148 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2149 if (heads > 16) {
2150 /* if heads > 16, it means that a BIOS LBA
2151 translation was active, so the default
2152 hardware geometry is OK */
2153 lba_detected = 1;
2154 goto default_geometry;
2155 } else {
2156 *pcyls = cylinders;
2157 *pheads = heads;
2158 *psecs = secs;
2159 /* disable any translation to be in sync with
2160 the logical geometry */
2161 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2162 bdrv_set_translation_hint(bs,
2163 BIOS_ATA_TRANSLATION_NONE);
2164 }
2165 }
2166 } else {
2167 default_geometry:
2168 /* if no geometry, use a standard physical disk geometry */
2169 cylinders = nb_sectors / (16 * 63);
2170
2171 if (cylinders > 16383)
2172 cylinders = 16383;
2173 else if (cylinders < 2)
2174 cylinders = 2;
2175 *pcyls = cylinders;
2176 *pheads = 16;
2177 *psecs = 63;
2178 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2179 if ((*pcyls * *pheads) <= 131072) {
2180 bdrv_set_translation_hint(bs,
2181 BIOS_ATA_TRANSLATION_LARGE);
2182 } else {
2183 bdrv_set_translation_hint(bs,
2184 BIOS_ATA_TRANSLATION_LBA);
2185 }
2186 }
2187 }
2188 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2189 }
2190}
2191
ths5fafdf22007-09-16 21:08:06 +00002192void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002193 int cyls, int heads, int secs)
2194{
2195 bs->cyls = cyls;
2196 bs->heads = heads;
2197 bs->secs = secs;
2198}
2199
bellard46d47672004-11-16 01:45:27 +00002200void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2201{
2202 bs->translation = translation;
2203}
2204
ths5fafdf22007-09-16 21:08:06 +00002205void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002206 int *pcyls, int *pheads, int *psecs)
2207{
2208 *pcyls = bs->cyls;
2209 *pheads = bs->heads;
2210 *psecs = bs->secs;
2211}
2212
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002213/* throttling disk io limits */
2214void bdrv_set_io_limits(BlockDriverState *bs,
2215 BlockIOLimit *io_limits)
2216{
2217 bs->io_limits = *io_limits;
2218 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2219}
2220
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002221/* Recognize floppy formats */
2222typedef struct FDFormat {
2223 FDriveType drive;
2224 uint8_t last_sect;
2225 uint8_t max_track;
2226 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002227 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002228} FDFormat;
2229
2230static const FDFormat fd_formats[] = {
2231 /* First entry is default format */
2232 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002233 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2234 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2235 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2236 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2237 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2238 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2239 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2240 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002241 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002242 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2243 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2244 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2245 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2246 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002247 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002248 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2249 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2250 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2251 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2252 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2253 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002254 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002255 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2256 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2257 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2258 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2259 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002260 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002261 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2262 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002263 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002264 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2265 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2266 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2267 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002268 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002269 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2270 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002271 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002272 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002273 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002274 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002275};
2276
2277void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2278 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002279 FDriveType drive_in, FDriveType *drive,
2280 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002281{
2282 const FDFormat *parse;
2283 uint64_t nb_sectors, size;
2284 int i, first_match, match;
2285
2286 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2287 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2288 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002289 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002290 } else {
2291 bdrv_get_geometry(bs, &nb_sectors);
2292 match = -1;
2293 first_match = -1;
2294 for (i = 0; ; i++) {
2295 parse = &fd_formats[i];
2296 if (parse->drive == FDRIVE_DRV_NONE) {
2297 break;
2298 }
2299 if (drive_in == parse->drive ||
2300 drive_in == FDRIVE_DRV_NONE) {
2301 size = (parse->max_head + 1) * parse->max_track *
2302 parse->last_sect;
2303 if (nb_sectors == size) {
2304 match = i;
2305 break;
2306 }
2307 if (first_match == -1) {
2308 first_match = i;
2309 }
2310 }
2311 }
2312 if (match == -1) {
2313 if (first_match == -1) {
2314 match = 1;
2315 } else {
2316 match = first_match;
2317 }
2318 parse = &fd_formats[match];
2319 }
2320 *nb_heads = parse->max_head + 1;
2321 *max_track = parse->max_track;
2322 *last_sect = parse->last_sect;
2323 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002324 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002325 }
2326}
2327
bellard46d47672004-11-16 01:45:27 +00002328int bdrv_get_translation_hint(BlockDriverState *bs)
2329{
2330 return bs->translation;
2331}
2332
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002333void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2334 BlockErrorAction on_write_error)
2335{
2336 bs->on_read_error = on_read_error;
2337 bs->on_write_error = on_write_error;
2338}
2339
2340BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2341{
2342 return is_read ? bs->on_read_error : bs->on_write_error;
2343}
2344
bellardb3380822004-03-14 21:38:54 +00002345int bdrv_is_read_only(BlockDriverState *bs)
2346{
2347 return bs->read_only;
2348}
2349
ths985a03b2007-12-24 16:10:43 +00002350int bdrv_is_sg(BlockDriverState *bs)
2351{
2352 return bs->sg;
2353}
2354
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002355int bdrv_enable_write_cache(BlockDriverState *bs)
2356{
2357 return bs->enable_write_cache;
2358}
2359
bellardea2384d2004-08-01 21:59:26 +00002360int bdrv_is_encrypted(BlockDriverState *bs)
2361{
2362 if (bs->backing_hd && bs->backing_hd->encrypted)
2363 return 1;
2364 return bs->encrypted;
2365}
2366
aliguoric0f4ce72009-03-05 23:01:01 +00002367int bdrv_key_required(BlockDriverState *bs)
2368{
2369 BlockDriverState *backing_hd = bs->backing_hd;
2370
2371 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2372 return 1;
2373 return (bs->encrypted && !bs->valid_key);
2374}
2375
bellardea2384d2004-08-01 21:59:26 +00002376int bdrv_set_key(BlockDriverState *bs, const char *key)
2377{
2378 int ret;
2379 if (bs->backing_hd && bs->backing_hd->encrypted) {
2380 ret = bdrv_set_key(bs->backing_hd, key);
2381 if (ret < 0)
2382 return ret;
2383 if (!bs->encrypted)
2384 return 0;
2385 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002386 if (!bs->encrypted) {
2387 return -EINVAL;
2388 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2389 return -ENOMEDIUM;
2390 }
aliguoric0f4ce72009-03-05 23:01:01 +00002391 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002392 if (ret < 0) {
2393 bs->valid_key = 0;
2394 } else if (!bs->valid_key) {
2395 bs->valid_key = 1;
2396 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002397 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002398 }
aliguoric0f4ce72009-03-05 23:01:01 +00002399 return ret;
bellardea2384d2004-08-01 21:59:26 +00002400}
2401
2402void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2403{
bellard19cb3732006-08-19 11:45:59 +00002404 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002405 buf[0] = '\0';
2406 } else {
2407 pstrcpy(buf, buf_size, bs->drv->format_name);
2408 }
2409}
2410
ths5fafdf22007-09-16 21:08:06 +00002411void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002412 void *opaque)
2413{
2414 BlockDriver *drv;
2415
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002416 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002417 it(opaque, drv->format_name);
2418 }
2419}
2420
bellardb3380822004-03-14 21:38:54 +00002421BlockDriverState *bdrv_find(const char *name)
2422{
2423 BlockDriverState *bs;
2424
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002425 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2426 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002427 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002428 }
bellardb3380822004-03-14 21:38:54 +00002429 }
2430 return NULL;
2431}
2432
Markus Armbruster2f399b02010-06-02 18:55:20 +02002433BlockDriverState *bdrv_next(BlockDriverState *bs)
2434{
2435 if (!bs) {
2436 return QTAILQ_FIRST(&bdrv_states);
2437 }
2438 return QTAILQ_NEXT(bs, list);
2439}
2440
aliguori51de9762009-03-05 23:00:43 +00002441void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002442{
2443 BlockDriverState *bs;
2444
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002445 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002446 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002447 }
2448}
2449
bellardea2384d2004-08-01 21:59:26 +00002450const char *bdrv_get_device_name(BlockDriverState *bs)
2451{
2452 return bs->device_name;
2453}
2454
aliguoric6ca28d2008-10-06 13:55:43 +00002455void bdrv_flush_all(void)
2456{
2457 BlockDriverState *bs;
2458
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002459 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002460 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002461 }
aliguoric6ca28d2008-10-06 13:55:43 +00002462}
2463
Kevin Wolff2feebb2010-04-14 17:30:35 +02002464int bdrv_has_zero_init(BlockDriverState *bs)
2465{
2466 assert(bs->drv);
2467
Kevin Wolf336c1c12010-07-28 11:26:29 +02002468 if (bs->drv->bdrv_has_zero_init) {
2469 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002470 }
2471
2472 return 1;
2473}
2474
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002475typedef struct BdrvCoIsAllocatedData {
2476 BlockDriverState *bs;
2477 int64_t sector_num;
2478 int nb_sectors;
2479 int *pnum;
2480 int ret;
2481 bool done;
2482} BdrvCoIsAllocatedData;
2483
thsf58c7b32008-06-05 21:53:49 +00002484/*
2485 * Returns true iff the specified sector is present in the disk image. Drivers
2486 * not implementing the functionality are assumed to not support backing files,
2487 * hence all their sectors are reported as allocated.
2488 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002489 * If 'sector_num' is beyond the end of the disk image the return value is 0
2490 * and 'pnum' is set to 0.
2491 *
thsf58c7b32008-06-05 21:53:49 +00002492 * 'pnum' is set to the number of sectors (including and immediately following
2493 * the specified sector) that are known to be in the same
2494 * allocated/unallocated state.
2495 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002496 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2497 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002498 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002499int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2500 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002501{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002502 int64_t n;
2503
2504 if (sector_num >= bs->total_sectors) {
2505 *pnum = 0;
2506 return 0;
2507 }
2508
2509 n = bs->total_sectors - sector_num;
2510 if (n < nb_sectors) {
2511 nb_sectors = n;
2512 }
2513
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002514 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002515 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002516 return 1;
2517 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002518
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002519 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2520}
2521
2522/* Coroutine wrapper for bdrv_is_allocated() */
2523static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2524{
2525 BdrvCoIsAllocatedData *data = opaque;
2526 BlockDriverState *bs = data->bs;
2527
2528 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2529 data->pnum);
2530 data->done = true;
2531}
2532
2533/*
2534 * Synchronous wrapper around bdrv_co_is_allocated().
2535 *
2536 * See bdrv_co_is_allocated() for details.
2537 */
2538int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2539 int *pnum)
2540{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002541 Coroutine *co;
2542 BdrvCoIsAllocatedData data = {
2543 .bs = bs,
2544 .sector_num = sector_num,
2545 .nb_sectors = nb_sectors,
2546 .pnum = pnum,
2547 .done = false,
2548 };
2549
2550 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2551 qemu_coroutine_enter(co, &data);
2552 while (!data.done) {
2553 qemu_aio_wait();
2554 }
2555 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002556}
2557
Luiz Capitulinob2023812011-09-21 17:16:47 -03002558BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002559{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002560 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002561 BlockDriverState *bs;
2562
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002563 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002564 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002565
Luiz Capitulinob2023812011-09-21 17:16:47 -03002566 info->value = g_malloc0(sizeof(*info->value));
2567 info->value->device = g_strdup(bs->device_name);
2568 info->value->type = g_strdup("unknown");
2569 info->value->locked = bdrv_dev_is_medium_locked(bs);
2570 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002571
Markus Armbrustere4def802011-09-06 18:58:53 +02002572 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002573 info->value->has_tray_open = true;
2574 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002575 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002576
2577 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002578 info->value->has_io_status = true;
2579 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002580 }
2581
bellard19cb3732006-08-19 11:45:59 +00002582 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002583 info->value->has_inserted = true;
2584 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2585 info->value->inserted->file = g_strdup(bs->filename);
2586 info->value->inserted->ro = bs->read_only;
2587 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2588 info->value->inserted->encrypted = bs->encrypted;
2589 if (bs->backing_file[0]) {
2590 info->value->inserted->has_backing_file = true;
2591 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002592 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002593
2594 if (bs->io_limits_enabled) {
2595 info->value->inserted->bps =
2596 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2597 info->value->inserted->bps_rd =
2598 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2599 info->value->inserted->bps_wr =
2600 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2601 info->value->inserted->iops =
2602 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2603 info->value->inserted->iops_rd =
2604 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2605 info->value->inserted->iops_wr =
2606 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2607 }
bellardb3380822004-03-14 21:38:54 +00002608 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002609
2610 /* XXX: waiting for the qapi to support GSList */
2611 if (!cur_item) {
2612 head = cur_item = info;
2613 } else {
2614 cur_item->next = info;
2615 cur_item = info;
2616 }
bellardb3380822004-03-14 21:38:54 +00002617 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002618
Luiz Capitulinob2023812011-09-21 17:16:47 -03002619 return head;
bellardb3380822004-03-14 21:38:54 +00002620}
thsa36e69d2007-12-02 05:18:19 +00002621
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002622/* Consider exposing this as a full fledged QMP command */
2623static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002624{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002625 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002626
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002627 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002628
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002629 if (bs->device_name[0]) {
2630 s->has_device = true;
2631 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002632 }
2633
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002634 s->stats = g_malloc0(sizeof(*s->stats));
2635 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2636 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2637 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2638 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2639 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2640 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2641 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2642 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2643 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2644
Kevin Wolf294cc352010-04-28 14:34:01 +02002645 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002646 s->has_parent = true;
2647 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002648 }
2649
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002650 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002651}
2652
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002653BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002654{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002655 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002656 BlockDriverState *bs;
2657
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002658 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002659 BlockStatsList *info = g_malloc0(sizeof(*info));
2660 info->value = qmp_query_blockstat(bs, NULL);
2661
2662 /* XXX: waiting for the qapi to support GSList */
2663 if (!cur_item) {
2664 head = cur_item = info;
2665 } else {
2666 cur_item->next = info;
2667 cur_item = info;
2668 }
thsa36e69d2007-12-02 05:18:19 +00002669 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002670
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002671 return head;
thsa36e69d2007-12-02 05:18:19 +00002672}
bellardea2384d2004-08-01 21:59:26 +00002673
aliguori045df332009-03-05 23:00:48 +00002674const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2675{
2676 if (bs->backing_hd && bs->backing_hd->encrypted)
2677 return bs->backing_file;
2678 else if (bs->encrypted)
2679 return bs->filename;
2680 else
2681 return NULL;
2682}
2683
ths5fafdf22007-09-16 21:08:06 +00002684void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002685 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002686{
Kevin Wolf3574c602011-10-26 11:02:11 +02002687 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002688}
2689
ths5fafdf22007-09-16 21:08:06 +00002690int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002691 const uint8_t *buf, int nb_sectors)
2692{
2693 BlockDriver *drv = bs->drv;
2694 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002695 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002696 if (!drv->bdrv_write_compressed)
2697 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002698 if (bdrv_check_request(bs, sector_num, nb_sectors))
2699 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002700
Jan Kiszkac6d22832009-11-30 18:21:20 +01002701 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002702 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2703 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002704
bellardfaea38e2006-08-05 21:31:00 +00002705 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2706}
ths3b46e622007-09-17 08:09:54 +00002707
bellardfaea38e2006-08-05 21:31:00 +00002708int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2709{
2710 BlockDriver *drv = bs->drv;
2711 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002712 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002713 if (!drv->bdrv_get_info)
2714 return -ENOTSUP;
2715 memset(bdi, 0, sizeof(*bdi));
2716 return drv->bdrv_get_info(bs, bdi);
2717}
2718
Christoph Hellwig45566e92009-07-10 23:11:57 +02002719int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2720 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002721{
2722 BlockDriver *drv = bs->drv;
2723 if (!drv)
2724 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002725 if (drv->bdrv_save_vmstate)
2726 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2727 if (bs->file)
2728 return bdrv_save_vmstate(bs->file, buf, pos, size);
2729 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002730}
2731
Christoph Hellwig45566e92009-07-10 23:11:57 +02002732int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2733 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002734{
2735 BlockDriver *drv = bs->drv;
2736 if (!drv)
2737 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002738 if (drv->bdrv_load_vmstate)
2739 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2740 if (bs->file)
2741 return bdrv_load_vmstate(bs->file, buf, pos, size);
2742 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002743}
2744
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002745void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2746{
2747 BlockDriver *drv = bs->drv;
2748
2749 if (!drv || !drv->bdrv_debug_event) {
2750 return;
2751 }
2752
2753 return drv->bdrv_debug_event(bs, event);
2754
2755}
2756
bellardfaea38e2006-08-05 21:31:00 +00002757/**************************************************************/
2758/* handling of snapshots */
2759
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002760int bdrv_can_snapshot(BlockDriverState *bs)
2761{
2762 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002763 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002764 return 0;
2765 }
2766
2767 if (!drv->bdrv_snapshot_create) {
2768 if (bs->file != NULL) {
2769 return bdrv_can_snapshot(bs->file);
2770 }
2771 return 0;
2772 }
2773
2774 return 1;
2775}
2776
Blue Swirl199630b2010-07-25 20:49:34 +00002777int bdrv_is_snapshot(BlockDriverState *bs)
2778{
2779 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2780}
2781
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002782BlockDriverState *bdrv_snapshots(void)
2783{
2784 BlockDriverState *bs;
2785
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002786 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002787 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002788 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002789
2790 bs = NULL;
2791 while ((bs = bdrv_next(bs))) {
2792 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002793 bs_snapshots = bs;
2794 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002795 }
2796 }
2797 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002798}
2799
ths5fafdf22007-09-16 21:08:06 +00002800int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002801 QEMUSnapshotInfo *sn_info)
2802{
2803 BlockDriver *drv = bs->drv;
2804 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002805 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002806 if (drv->bdrv_snapshot_create)
2807 return drv->bdrv_snapshot_create(bs, sn_info);
2808 if (bs->file)
2809 return bdrv_snapshot_create(bs->file, sn_info);
2810 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002811}
2812
ths5fafdf22007-09-16 21:08:06 +00002813int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002814 const char *snapshot_id)
2815{
2816 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002817 int ret, open_ret;
2818
bellardfaea38e2006-08-05 21:31:00 +00002819 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002820 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002821 if (drv->bdrv_snapshot_goto)
2822 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2823
2824 if (bs->file) {
2825 drv->bdrv_close(bs);
2826 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2827 open_ret = drv->bdrv_open(bs, bs->open_flags);
2828 if (open_ret < 0) {
2829 bdrv_delete(bs->file);
2830 bs->drv = NULL;
2831 return open_ret;
2832 }
2833 return ret;
2834 }
2835
2836 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002837}
2838
2839int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2840{
2841 BlockDriver *drv = bs->drv;
2842 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002843 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002844 if (drv->bdrv_snapshot_delete)
2845 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2846 if (bs->file)
2847 return bdrv_snapshot_delete(bs->file, snapshot_id);
2848 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002849}
2850
ths5fafdf22007-09-16 21:08:06 +00002851int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002852 QEMUSnapshotInfo **psn_info)
2853{
2854 BlockDriver *drv = bs->drv;
2855 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002856 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002857 if (drv->bdrv_snapshot_list)
2858 return drv->bdrv_snapshot_list(bs, psn_info);
2859 if (bs->file)
2860 return bdrv_snapshot_list(bs->file, psn_info);
2861 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002862}
2863
edison51ef6722010-09-21 19:58:41 -07002864int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2865 const char *snapshot_name)
2866{
2867 BlockDriver *drv = bs->drv;
2868 if (!drv) {
2869 return -ENOMEDIUM;
2870 }
2871 if (!bs->read_only) {
2872 return -EINVAL;
2873 }
2874 if (drv->bdrv_snapshot_load_tmp) {
2875 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2876 }
2877 return -ENOTSUP;
2878}
2879
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002880BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2881 const char *backing_file)
2882{
2883 if (!bs->drv) {
2884 return NULL;
2885 }
2886
2887 if (bs->backing_hd) {
2888 if (strcmp(bs->backing_file, backing_file) == 0) {
2889 return bs->backing_hd;
2890 } else {
2891 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2892 }
2893 }
2894
2895 return NULL;
2896}
2897
bellardfaea38e2006-08-05 21:31:00 +00002898#define NB_SUFFIXES 4
2899
2900char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2901{
2902 static const char suffixes[NB_SUFFIXES] = "KMGT";
2903 int64_t base;
2904 int i;
2905
2906 if (size <= 999) {
2907 snprintf(buf, buf_size, "%" PRId64, size);
2908 } else {
2909 base = 1024;
2910 for(i = 0; i < NB_SUFFIXES; i++) {
2911 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002912 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002913 (double)size / base,
2914 suffixes[i]);
2915 break;
2916 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002917 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002918 ((size + (base >> 1)) / base),
2919 suffixes[i]);
2920 break;
2921 }
2922 base = base * 1024;
2923 }
2924 }
2925 return buf;
2926}
2927
2928char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2929{
2930 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002931#ifdef _WIN32
2932 struct tm *ptm;
2933#else
bellardfaea38e2006-08-05 21:31:00 +00002934 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002935#endif
bellardfaea38e2006-08-05 21:31:00 +00002936 time_t ti;
2937 int64_t secs;
2938
2939 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002940 snprintf(buf, buf_size,
2941 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002942 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2943 } else {
2944 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002945#ifdef _WIN32
2946 ptm = localtime(&ti);
2947 strftime(date_buf, sizeof(date_buf),
2948 "%Y-%m-%d %H:%M:%S", ptm);
2949#else
bellardfaea38e2006-08-05 21:31:00 +00002950 localtime_r(&ti, &tm);
2951 strftime(date_buf, sizeof(date_buf),
2952 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002953#endif
bellardfaea38e2006-08-05 21:31:00 +00002954 secs = sn->vm_clock_nsec / 1000000000;
2955 snprintf(clock_buf, sizeof(clock_buf),
2956 "%02d:%02d:%02d.%03d",
2957 (int)(secs / 3600),
2958 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002959 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002960 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2961 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002962 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002963 sn->id_str, sn->name,
2964 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2965 date_buf,
2966 clock_buf);
2967 }
2968 return buf;
2969}
2970
bellard83f64092006-08-01 16:21:11 +00002971/**************************************************************/
2972/* async I/Os */
2973
aliguori3b69e4b2009-01-22 16:59:24 +00002974BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002975 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002976 BlockDriverCompletionFunc *cb, void *opaque)
2977{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002978 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2979
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002980 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002981 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002982}
2983
aliguorif141eaf2009-04-07 18:43:24 +00002984BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2985 QEMUIOVector *qiov, int nb_sectors,
2986 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002987{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002988 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2989
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002990 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002991 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002992}
2993
Kevin Wolf40b4f532009-09-09 17:53:37 +02002994
2995typedef struct MultiwriteCB {
2996 int error;
2997 int num_requests;
2998 int num_callbacks;
2999 struct {
3000 BlockDriverCompletionFunc *cb;
3001 void *opaque;
3002 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003003 } callbacks[];
3004} MultiwriteCB;
3005
3006static void multiwrite_user_cb(MultiwriteCB *mcb)
3007{
3008 int i;
3009
3010 for (i = 0; i < mcb->num_callbacks; i++) {
3011 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01003012 if (mcb->callbacks[i].free_qiov) {
3013 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3014 }
Anthony Liguori7267c092011-08-20 22:09:37 -05003015 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003016 }
3017}
3018
3019static void multiwrite_cb(void *opaque, int ret)
3020{
3021 MultiwriteCB *mcb = opaque;
3022
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003023 trace_multiwrite_cb(mcb, ret);
3024
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02003025 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02003026 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003027 }
3028
3029 mcb->num_requests--;
3030 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02003031 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05003032 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003033 }
3034}
3035
3036static int multiwrite_req_compare(const void *a, const void *b)
3037{
Christoph Hellwig77be4362010-05-19 20:53:10 +02003038 const BlockRequest *req1 = a, *req2 = b;
3039
3040 /*
3041 * Note that we can't simply subtract req2->sector from req1->sector
3042 * here as that could overflow the return value.
3043 */
3044 if (req1->sector > req2->sector) {
3045 return 1;
3046 } else if (req1->sector < req2->sector) {
3047 return -1;
3048 } else {
3049 return 0;
3050 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02003051}
3052
3053/*
3054 * Takes a bunch of requests and tries to merge them. Returns the number of
3055 * requests that remain after merging.
3056 */
3057static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3058 int num_reqs, MultiwriteCB *mcb)
3059{
3060 int i, outidx;
3061
3062 // Sort requests by start sector
3063 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3064
3065 // Check if adjacent requests touch the same clusters. If so, combine them,
3066 // filling up gaps with zero sectors.
3067 outidx = 0;
3068 for (i = 1; i < num_reqs; i++) {
3069 int merge = 0;
3070 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3071
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003072 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02003073 if (reqs[i].sector <= oldreq_last) {
3074 merge = 1;
3075 }
3076
Christoph Hellwige2a305f2010-01-26 14:49:08 +01003077 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3078 merge = 0;
3079 }
3080
Kevin Wolf40b4f532009-09-09 17:53:37 +02003081 if (merge) {
3082 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05003083 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003084 qemu_iovec_init(qiov,
3085 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3086
3087 // Add the first request to the merged one. If the requests are
3088 // overlapping, drop the last sectors of the first request.
3089 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3090 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3091
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003092 // We should need to add any zeros between the two requests
3093 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003094
3095 // Add the second request
3096 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3097
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003098 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003099 reqs[outidx].qiov = qiov;
3100
3101 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3102 } else {
3103 outidx++;
3104 reqs[outidx].sector = reqs[i].sector;
3105 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3106 reqs[outidx].qiov = reqs[i].qiov;
3107 }
3108 }
3109
3110 return outidx + 1;
3111}
3112
3113/*
3114 * Submit multiple AIO write requests at once.
3115 *
3116 * On success, the function returns 0 and all requests in the reqs array have
3117 * been submitted. In error case this function returns -1, and any of the
3118 * requests may or may not be submitted yet. In particular, this means that the
3119 * callback will be called for some of the requests, for others it won't. The
3120 * caller must check the error field of the BlockRequest to wait for the right
3121 * callbacks (if error != 0, no callback will be called).
3122 *
3123 * The implementation may modify the contents of the reqs array, e.g. to merge
3124 * requests. However, the fields opaque and error are left unmodified as they
3125 * are used to signal failure for a single request to the caller.
3126 */
3127int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3128{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003129 MultiwriteCB *mcb;
3130 int i;
3131
Ryan Harper301db7c2011-03-07 10:01:04 -06003132 /* don't submit writes if we don't have a medium */
3133 if (bs->drv == NULL) {
3134 for (i = 0; i < num_reqs; i++) {
3135 reqs[i].error = -ENOMEDIUM;
3136 }
3137 return -1;
3138 }
3139
Kevin Wolf40b4f532009-09-09 17:53:37 +02003140 if (num_reqs == 0) {
3141 return 0;
3142 }
3143
3144 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003145 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003146 mcb->num_requests = 0;
3147 mcb->num_callbacks = num_reqs;
3148
3149 for (i = 0; i < num_reqs; i++) {
3150 mcb->callbacks[i].cb = reqs[i].cb;
3151 mcb->callbacks[i].opaque = reqs[i].opaque;
3152 }
3153
3154 // Check for mergable requests
3155 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3156
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003157 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3158
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003159 /* Run the aio requests. */
3160 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003161 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003162 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003163 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003164 }
3165
3166 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003167}
3168
bellard83f64092006-08-01 16:21:11 +00003169void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003170{
aliguori6bbff9a2009-03-20 18:25:59 +00003171 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003172}
3173
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003174/* block I/O throttling */
3175static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3176 bool is_write, double elapsed_time, uint64_t *wait)
3177{
3178 uint64_t bps_limit = 0;
3179 double bytes_limit, bytes_base, bytes_res;
3180 double slice_time, wait_time;
3181
3182 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3183 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3184 } else if (bs->io_limits.bps[is_write]) {
3185 bps_limit = bs->io_limits.bps[is_write];
3186 } else {
3187 if (wait) {
3188 *wait = 0;
3189 }
3190
3191 return false;
3192 }
3193
3194 slice_time = bs->slice_end - bs->slice_start;
3195 slice_time /= (NANOSECONDS_PER_SECOND);
3196 bytes_limit = bps_limit * slice_time;
3197 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3198 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3199 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3200 }
3201
3202 /* bytes_base: the bytes of data which have been read/written; and
3203 * it is obtained from the history statistic info.
3204 * bytes_res: the remaining bytes of data which need to be read/written.
3205 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3206 * the total time for completing reading/writting all data.
3207 */
3208 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3209
3210 if (bytes_base + bytes_res <= bytes_limit) {
3211 if (wait) {
3212 *wait = 0;
3213 }
3214
3215 return false;
3216 }
3217
3218 /* Calc approx time to dispatch */
3219 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3220
3221 /* When the I/O rate at runtime exceeds the limits,
3222 * bs->slice_end need to be extended in order that the current statistic
3223 * info can be kept until the timer fire, so it is increased and tuned
3224 * based on the result of experiment.
3225 */
3226 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3227 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3228 if (wait) {
3229 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3230 }
3231
3232 return true;
3233}
3234
3235static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3236 double elapsed_time, uint64_t *wait)
3237{
3238 uint64_t iops_limit = 0;
3239 double ios_limit, ios_base;
3240 double slice_time, wait_time;
3241
3242 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3243 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3244 } else if (bs->io_limits.iops[is_write]) {
3245 iops_limit = bs->io_limits.iops[is_write];
3246 } else {
3247 if (wait) {
3248 *wait = 0;
3249 }
3250
3251 return false;
3252 }
3253
3254 slice_time = bs->slice_end - bs->slice_start;
3255 slice_time /= (NANOSECONDS_PER_SECOND);
3256 ios_limit = iops_limit * slice_time;
3257 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3258 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3259 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3260 }
3261
3262 if (ios_base + 1 <= ios_limit) {
3263 if (wait) {
3264 *wait = 0;
3265 }
3266
3267 return false;
3268 }
3269
3270 /* Calc approx time to dispatch */
3271 wait_time = (ios_base + 1) / iops_limit;
3272 if (wait_time > elapsed_time) {
3273 wait_time = wait_time - elapsed_time;
3274 } else {
3275 wait_time = 0;
3276 }
3277
3278 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3279 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3280 if (wait) {
3281 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3282 }
3283
3284 return true;
3285}
3286
3287static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3288 bool is_write, int64_t *wait)
3289{
3290 int64_t now, max_wait;
3291 uint64_t bps_wait = 0, iops_wait = 0;
3292 double elapsed_time;
3293 int bps_ret, iops_ret;
3294
3295 now = qemu_get_clock_ns(vm_clock);
3296 if ((bs->slice_start < now)
3297 && (bs->slice_end > now)) {
3298 bs->slice_end = now + bs->slice_time;
3299 } else {
3300 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3301 bs->slice_start = now;
3302 bs->slice_end = now + bs->slice_time;
3303
3304 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3305 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3306
3307 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3308 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3309 }
3310
3311 elapsed_time = now - bs->slice_start;
3312 elapsed_time /= (NANOSECONDS_PER_SECOND);
3313
3314 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3315 is_write, elapsed_time, &bps_wait);
3316 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3317 elapsed_time, &iops_wait);
3318 if (bps_ret || iops_ret) {
3319 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3320 if (wait) {
3321 *wait = max_wait;
3322 }
3323
3324 now = qemu_get_clock_ns(vm_clock);
3325 if (bs->slice_end < now + max_wait) {
3326 bs->slice_end = now + max_wait;
3327 }
3328
3329 return true;
3330 }
3331
3332 if (wait) {
3333 *wait = 0;
3334 }
3335
3336 return false;
3337}
pbrookce1a14d2006-08-07 02:38:06 +00003338
bellard83f64092006-08-01 16:21:11 +00003339/**************************************************************/
3340/* async block device emulation */
3341
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003342typedef struct BlockDriverAIOCBSync {
3343 BlockDriverAIOCB common;
3344 QEMUBH *bh;
3345 int ret;
3346 /* vector translation state */
3347 QEMUIOVector *qiov;
3348 uint8_t *bounce;
3349 int is_write;
3350} BlockDriverAIOCBSync;
3351
3352static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3353{
Kevin Wolfb666d232010-05-05 11:44:39 +02003354 BlockDriverAIOCBSync *acb =
3355 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003356 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003357 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003358 qemu_aio_release(acb);
3359}
3360
3361static AIOPool bdrv_em_aio_pool = {
3362 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3363 .cancel = bdrv_aio_cancel_em,
3364};
3365
bellard83f64092006-08-01 16:21:11 +00003366static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003367{
pbrookce1a14d2006-08-07 02:38:06 +00003368 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003369
aliguorif141eaf2009-04-07 18:43:24 +00003370 if (!acb->is_write)
3371 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003372 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003373 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003374 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003375 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003376 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003377}
bellardbeac80c2006-06-26 20:08:57 +00003378
aliguorif141eaf2009-04-07 18:43:24 +00003379static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3380 int64_t sector_num,
3381 QEMUIOVector *qiov,
3382 int nb_sectors,
3383 BlockDriverCompletionFunc *cb,
3384 void *opaque,
3385 int is_write)
3386
bellardea2384d2004-08-01 21:59:26 +00003387{
pbrookce1a14d2006-08-07 02:38:06 +00003388 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003389
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003390 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003391 acb->is_write = is_write;
3392 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003393 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003394 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003395
3396 if (is_write) {
3397 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003398 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003399 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003400 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003401 }
3402
pbrookce1a14d2006-08-07 02:38:06 +00003403 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003404
pbrookce1a14d2006-08-07 02:38:06 +00003405 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003406}
3407
aliguorif141eaf2009-04-07 18:43:24 +00003408static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3409 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003410 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003411{
aliguorif141eaf2009-04-07 18:43:24 +00003412 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003413}
3414
aliguorif141eaf2009-04-07 18:43:24 +00003415static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3416 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3417 BlockDriverCompletionFunc *cb, void *opaque)
3418{
3419 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3420}
3421
Kevin Wolf68485422011-06-30 10:05:46 +02003422
3423typedef struct BlockDriverAIOCBCoroutine {
3424 BlockDriverAIOCB common;
3425 BlockRequest req;
3426 bool is_write;
3427 QEMUBH* bh;
3428} BlockDriverAIOCBCoroutine;
3429
3430static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3431{
3432 qemu_aio_flush();
3433}
3434
3435static AIOPool bdrv_em_co_aio_pool = {
3436 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3437 .cancel = bdrv_aio_co_cancel_em,
3438};
3439
Paolo Bonzini35246a62011-10-14 10:41:29 +02003440static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003441{
3442 BlockDriverAIOCBCoroutine *acb = opaque;
3443
3444 acb->common.cb(acb->common.opaque, acb->req.error);
3445 qemu_bh_delete(acb->bh);
3446 qemu_aio_release(acb);
3447}
3448
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003449/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3450static void coroutine_fn bdrv_co_do_rw(void *opaque)
3451{
3452 BlockDriverAIOCBCoroutine *acb = opaque;
3453 BlockDriverState *bs = acb->common.bs;
3454
3455 if (!acb->is_write) {
3456 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003457 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003458 } else {
3459 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003460 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003461 }
3462
Paolo Bonzini35246a62011-10-14 10:41:29 +02003463 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003464 qemu_bh_schedule(acb->bh);
3465}
3466
Kevin Wolf68485422011-06-30 10:05:46 +02003467static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3468 int64_t sector_num,
3469 QEMUIOVector *qiov,
3470 int nb_sectors,
3471 BlockDriverCompletionFunc *cb,
3472 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003473 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003474{
3475 Coroutine *co;
3476 BlockDriverAIOCBCoroutine *acb;
3477
3478 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3479 acb->req.sector = sector_num;
3480 acb->req.nb_sectors = nb_sectors;
3481 acb->req.qiov = qiov;
3482 acb->is_write = is_write;
3483
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003484 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003485 qemu_coroutine_enter(co, acb);
3486
3487 return &acb->common;
3488}
3489
Paolo Bonzini07f07612011-10-17 12:32:12 +02003490static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003491{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003492 BlockDriverAIOCBCoroutine *acb = opaque;
3493 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003494
Paolo Bonzini07f07612011-10-17 12:32:12 +02003495 acb->req.error = bdrv_co_flush(bs);
3496 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003497 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003498}
3499
Paolo Bonzini07f07612011-10-17 12:32:12 +02003500BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003501 BlockDriverCompletionFunc *cb, void *opaque)
3502{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003503 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003504
Paolo Bonzini07f07612011-10-17 12:32:12 +02003505 Coroutine *co;
3506 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003507
Paolo Bonzini07f07612011-10-17 12:32:12 +02003508 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3509 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3510 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003511
Alexander Graf016f5cf2010-05-26 17:51:49 +02003512 return &acb->common;
3513}
3514
Paolo Bonzini4265d622011-10-17 12:32:14 +02003515static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3516{
3517 BlockDriverAIOCBCoroutine *acb = opaque;
3518 BlockDriverState *bs = acb->common.bs;
3519
3520 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3521 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3522 qemu_bh_schedule(acb->bh);
3523}
3524
3525BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3526 int64_t sector_num, int nb_sectors,
3527 BlockDriverCompletionFunc *cb, void *opaque)
3528{
3529 Coroutine *co;
3530 BlockDriverAIOCBCoroutine *acb;
3531
3532 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3533
3534 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3535 acb->req.sector = sector_num;
3536 acb->req.nb_sectors = nb_sectors;
3537 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3538 qemu_coroutine_enter(co, acb);
3539
3540 return &acb->common;
3541}
3542
bellardea2384d2004-08-01 21:59:26 +00003543void bdrv_init(void)
3544{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003545 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003546}
pbrookce1a14d2006-08-07 02:38:06 +00003547
Markus Armbrustereb852012009-10-27 18:41:44 +01003548void bdrv_init_with_whitelist(void)
3549{
3550 use_bdrv_whitelist = 1;
3551 bdrv_init();
3552}
3553
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003554void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3555 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003556{
pbrookce1a14d2006-08-07 02:38:06 +00003557 BlockDriverAIOCB *acb;
3558
aliguori6bbff9a2009-03-20 18:25:59 +00003559 if (pool->free_aiocb) {
3560 acb = pool->free_aiocb;
3561 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003562 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003563 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003564 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003565 }
3566 acb->bs = bs;
3567 acb->cb = cb;
3568 acb->opaque = opaque;
3569 return acb;
3570}
3571
3572void qemu_aio_release(void *p)
3573{
aliguori6bbff9a2009-03-20 18:25:59 +00003574 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3575 AIOPool *pool = acb->pool;
3576 acb->next = pool->free_aiocb;
3577 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003578}
bellard19cb3732006-08-19 11:45:59 +00003579
3580/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003581/* Coroutine block device emulation */
3582
3583typedef struct CoroutineIOCompletion {
3584 Coroutine *coroutine;
3585 int ret;
3586} CoroutineIOCompletion;
3587
3588static void bdrv_co_io_em_complete(void *opaque, int ret)
3589{
3590 CoroutineIOCompletion *co = opaque;
3591
3592 co->ret = ret;
3593 qemu_coroutine_enter(co->coroutine, NULL);
3594}
3595
3596static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3597 int nb_sectors, QEMUIOVector *iov,
3598 bool is_write)
3599{
3600 CoroutineIOCompletion co = {
3601 .coroutine = qemu_coroutine_self(),
3602 };
3603 BlockDriverAIOCB *acb;
3604
3605 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003606 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3607 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003608 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003609 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3610 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003611 }
3612
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003613 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003614 if (!acb) {
3615 return -EIO;
3616 }
3617 qemu_coroutine_yield();
3618
3619 return co.ret;
3620}
3621
3622static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3623 int64_t sector_num, int nb_sectors,
3624 QEMUIOVector *iov)
3625{
3626 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3627}
3628
3629static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3630 int64_t sector_num, int nb_sectors,
3631 QEMUIOVector *iov)
3632{
3633 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3634}
3635
Paolo Bonzini07f07612011-10-17 12:32:12 +02003636static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003637{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003638 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003639
Paolo Bonzini07f07612011-10-17 12:32:12 +02003640 rwco->ret = bdrv_co_flush(rwco->bs);
3641}
3642
3643int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3644{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003645 int ret;
3646
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003647 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003648 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003649 }
3650
Kevin Wolfca716362011-11-10 18:13:59 +01003651 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003652 if (bs->drv->bdrv_co_flush_to_os) {
3653 ret = bs->drv->bdrv_co_flush_to_os(bs);
3654 if (ret < 0) {
3655 return ret;
3656 }
3657 }
3658
Kevin Wolfca716362011-11-10 18:13:59 +01003659 /* But don't actually force it to the disk with cache=unsafe */
3660 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3661 return 0;
3662 }
3663
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003664 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003665 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003666 } else if (bs->drv->bdrv_aio_flush) {
3667 BlockDriverAIOCB *acb;
3668 CoroutineIOCompletion co = {
3669 .coroutine = qemu_coroutine_self(),
3670 };
3671
3672 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3673 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003674 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003675 } else {
3676 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003677 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003678 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003679 } else {
3680 /*
3681 * Some block drivers always operate in either writethrough or unsafe
3682 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3683 * know how the server works (because the behaviour is hardcoded or
3684 * depends on server-side configuration), so we can't ensure that
3685 * everything is safe on disk. Returning an error doesn't work because
3686 * that would break guests even if the server operates in writethrough
3687 * mode.
3688 *
3689 * Let's hope the user knows what he's doing.
3690 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003691 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003692 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003693 if (ret < 0) {
3694 return ret;
3695 }
3696
3697 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3698 * in the case of cache=unsafe, so there are no useless flushes.
3699 */
3700 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003701}
3702
Anthony Liguori0f154232011-11-14 15:09:45 -06003703void bdrv_invalidate_cache(BlockDriverState *bs)
3704{
3705 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3706 bs->drv->bdrv_invalidate_cache(bs);
3707 }
3708}
3709
3710void bdrv_invalidate_cache_all(void)
3711{
3712 BlockDriverState *bs;
3713
3714 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3715 bdrv_invalidate_cache(bs);
3716 }
3717}
3718
Benoît Canet07789262012-03-23 08:36:49 +01003719void bdrv_clear_incoming_migration_all(void)
3720{
3721 BlockDriverState *bs;
3722
3723 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3724 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3725 }
3726}
3727
Paolo Bonzini07f07612011-10-17 12:32:12 +02003728int bdrv_flush(BlockDriverState *bs)
3729{
3730 Coroutine *co;
3731 RwCo rwco = {
3732 .bs = bs,
3733 .ret = NOT_DONE,
3734 };
3735
3736 if (qemu_in_coroutine()) {
3737 /* Fast-path if already in coroutine context */
3738 bdrv_flush_co_entry(&rwco);
3739 } else {
3740 co = qemu_coroutine_create(bdrv_flush_co_entry);
3741 qemu_coroutine_enter(co, &rwco);
3742 while (rwco.ret == NOT_DONE) {
3743 qemu_aio_wait();
3744 }
3745 }
3746
3747 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003748}
3749
Paolo Bonzini4265d622011-10-17 12:32:14 +02003750static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3751{
3752 RwCo *rwco = opaque;
3753
3754 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3755}
3756
3757int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3758 int nb_sectors)
3759{
3760 if (!bs->drv) {
3761 return -ENOMEDIUM;
3762 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3763 return -EIO;
3764 } else if (bs->read_only) {
3765 return -EROFS;
3766 } else if (bs->drv->bdrv_co_discard) {
3767 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3768 } else if (bs->drv->bdrv_aio_discard) {
3769 BlockDriverAIOCB *acb;
3770 CoroutineIOCompletion co = {
3771 .coroutine = qemu_coroutine_self(),
3772 };
3773
3774 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3775 bdrv_co_io_em_complete, &co);
3776 if (acb == NULL) {
3777 return -EIO;
3778 } else {
3779 qemu_coroutine_yield();
3780 return co.ret;
3781 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003782 } else {
3783 return 0;
3784 }
3785}
3786
3787int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3788{
3789 Coroutine *co;
3790 RwCo rwco = {
3791 .bs = bs,
3792 .sector_num = sector_num,
3793 .nb_sectors = nb_sectors,
3794 .ret = NOT_DONE,
3795 };
3796
3797 if (qemu_in_coroutine()) {
3798 /* Fast-path if already in coroutine context */
3799 bdrv_discard_co_entry(&rwco);
3800 } else {
3801 co = qemu_coroutine_create(bdrv_discard_co_entry);
3802 qemu_coroutine_enter(co, &rwco);
3803 while (rwco.ret == NOT_DONE) {
3804 qemu_aio_wait();
3805 }
3806 }
3807
3808 return rwco.ret;
3809}
3810
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003811/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003812/* removable device support */
3813
3814/**
3815 * Return TRUE if the media is present
3816 */
3817int bdrv_is_inserted(BlockDriverState *bs)
3818{
3819 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003820
bellard19cb3732006-08-19 11:45:59 +00003821 if (!drv)
3822 return 0;
3823 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003824 return 1;
3825 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003826}
3827
3828/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003829 * Return whether the media changed since the last call to this
3830 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003831 */
3832int bdrv_media_changed(BlockDriverState *bs)
3833{
3834 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003835
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003836 if (drv && drv->bdrv_media_changed) {
3837 return drv->bdrv_media_changed(bs);
3838 }
3839 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003840}
3841
3842/**
3843 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3844 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003845void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003846{
3847 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003848
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003849 if (drv && drv->bdrv_eject) {
3850 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003851 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003852
3853 if (bs->device_name[0] != '\0') {
3854 bdrv_emit_qmp_eject_event(bs, eject_flag);
3855 }
bellard19cb3732006-08-19 11:45:59 +00003856}
3857
bellard19cb3732006-08-19 11:45:59 +00003858/**
3859 * Lock or unlock the media (if it is locked, the user won't be able
3860 * to eject it manually).
3861 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003862void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003863{
3864 BlockDriver *drv = bs->drv;
3865
Markus Armbruster025e8492011-09-06 18:58:47 +02003866 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003867
Markus Armbruster025e8492011-09-06 18:58:47 +02003868 if (drv && drv->bdrv_lock_medium) {
3869 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003870 }
3871}
ths985a03b2007-12-24 16:10:43 +00003872
3873/* needed for generic scsi interface */
3874
3875int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3876{
3877 BlockDriver *drv = bs->drv;
3878
3879 if (drv && drv->bdrv_ioctl)
3880 return drv->bdrv_ioctl(bs, req, buf);
3881 return -ENOTSUP;
3882}
aliguori7d780662009-03-12 19:57:08 +00003883
aliguori221f7152009-03-28 17:28:41 +00003884BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3885 unsigned long int req, void *buf,
3886 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003887{
aliguori221f7152009-03-28 17:28:41 +00003888 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003889
aliguori221f7152009-03-28 17:28:41 +00003890 if (drv && drv->bdrv_aio_ioctl)
3891 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3892 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003893}
aliguorie268ca52009-04-22 20:20:00 +00003894
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003895void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3896{
3897 bs->buffer_alignment = align;
3898}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003899
aliguorie268ca52009-04-22 20:20:00 +00003900void *qemu_blockalign(BlockDriverState *bs, size_t size)
3901{
3902 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3903}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003904
3905void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3906{
3907 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003908
Liran Schouraaa0eb72010-01-26 10:31:48 +02003909 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003910 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003911 if (!bs->dirty_bitmap) {
3912 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003913 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3914 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003915
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003916 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003917 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003918 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003919 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003920 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003921 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003922 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003923 }
3924}
3925
3926int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3927{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003928 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003929
Jan Kiszkac6d22832009-11-30 18:21:20 +01003930 if (bs->dirty_bitmap &&
3931 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003932 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3933 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003934 } else {
3935 return 0;
3936 }
3937}
3938
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003939void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3940 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003941{
3942 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3943}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003944
3945int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3946{
3947 return bs->dirty_count;
3948}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003949
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003950void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3951{
3952 assert(bs->in_use != in_use);
3953 bs->in_use = in_use;
3954}
3955
3956int bdrv_in_use(BlockDriverState *bs)
3957{
3958 return bs->in_use;
3959}
3960
Luiz Capitulino28a72822011-09-26 17:43:50 -03003961void bdrv_iostatus_enable(BlockDriverState *bs)
3962{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003963 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003964 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003965}
3966
3967/* The I/O status is only enabled if the drive explicitly
3968 * enables it _and_ the VM is configured to stop on errors */
3969bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3970{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003971 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003972 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3973 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3974 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3975}
3976
3977void bdrv_iostatus_disable(BlockDriverState *bs)
3978{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003979 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003980}
3981
3982void bdrv_iostatus_reset(BlockDriverState *bs)
3983{
3984 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003985 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003986 }
3987}
3988
3989/* XXX: Today this is set by device models because it makes the implementation
3990 quite simple. However, the block layer knows about the error, so it's
3991 possible to implement this without device models being involved */
3992void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3993{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003994 if (bdrv_iostatus_is_enabled(bs) &&
3995 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003996 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003997 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3998 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003999 }
4000}
4001
Christoph Hellwiga597e792011-08-25 08:26:01 +02004002void
4003bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4004 enum BlockAcctType type)
4005{
4006 assert(type < BDRV_MAX_IOTYPE);
4007
4008 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004009 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02004010 cookie->type = type;
4011}
4012
4013void
4014bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4015{
4016 assert(cookie->type < BDRV_MAX_IOTYPE);
4017
4018 bs->nr_bytes[cookie->type] += cookie->bytes;
4019 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004020 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02004021}
4022
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004023int bdrv_img_create(const char *filename, const char *fmt,
4024 const char *base_filename, const char *base_fmt,
4025 char *options, uint64_t img_size, int flags)
4026{
4027 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02004028 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004029 BlockDriverState *bs = NULL;
4030 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004031 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004032 int ret = 0;
4033
4034 /* Find driver and parse its options */
4035 drv = bdrv_find_format(fmt);
4036 if (!drv) {
4037 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004038 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004039 goto out;
4040 }
4041
4042 proto_drv = bdrv_find_protocol(filename);
4043 if (!proto_drv) {
4044 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004045 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004046 goto out;
4047 }
4048
4049 create_options = append_option_parameters(create_options,
4050 drv->create_options);
4051 create_options = append_option_parameters(create_options,
4052 proto_drv->create_options);
4053
4054 /* Create parameter list with default values */
4055 param = parse_option_parameters("", create_options, param);
4056
4057 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4058
4059 /* Parse -o options */
4060 if (options) {
4061 param = parse_option_parameters(options, create_options, param);
4062 if (param == NULL) {
4063 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004064 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004065 goto out;
4066 }
4067 }
4068
4069 if (base_filename) {
4070 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4071 base_filename)) {
4072 error_report("Backing file not supported for file format '%s'",
4073 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004074 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004075 goto out;
4076 }
4077 }
4078
4079 if (base_fmt) {
4080 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4081 error_report("Backing file format not supported for file "
4082 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004083 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004084 goto out;
4085 }
4086 }
4087
Jes Sorensen792da932010-12-16 13:52:17 +01004088 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4089 if (backing_file && backing_file->value.s) {
4090 if (!strcmp(filename, backing_file->value.s)) {
4091 error_report("Error: Trying to create an image with the "
4092 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004093 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004094 goto out;
4095 }
4096 }
4097
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004098 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4099 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004100 backing_drv = bdrv_find_format(backing_fmt->value.s);
4101 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004102 error_report("Unknown backing file format '%s'",
4103 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004104 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004105 goto out;
4106 }
4107 }
4108
4109 // The size for the image must always be specified, with one exception:
4110 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004111 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4112 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004113 if (backing_file && backing_file->value.s) {
4114 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004115 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02004116 int back_flags;
4117
4118 /* backing files always opened read-only */
4119 back_flags =
4120 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004121
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004122 bs = bdrv_new("");
4123
Paolo Bonzini63090da2012-04-12 14:01:03 +02004124 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004125 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004126 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004127 goto out;
4128 }
4129 bdrv_get_geometry(bs, &size);
4130 size *= 512;
4131
4132 snprintf(buf, sizeof(buf), "%" PRId64, size);
4133 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4134 } else {
4135 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004136 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004137 goto out;
4138 }
4139 }
4140
4141 printf("Formatting '%s', fmt=%s ", filename, fmt);
4142 print_option_parameters(param);
4143 puts("");
4144
4145 ret = bdrv_create(drv, filename, param);
4146
4147 if (ret < 0) {
4148 if (ret == -ENOTSUP) {
4149 error_report("Formatting or formatting option not supported for "
4150 "file format '%s'", fmt);
4151 } else if (ret == -EFBIG) {
4152 error_report("The image size is too large for file format '%s'",
4153 fmt);
4154 } else {
4155 error_report("%s: error while creating %s: %s", filename, fmt,
4156 strerror(-ret));
4157 }
4158 }
4159
4160out:
4161 free_option_parameters(create_options);
4162 free_option_parameters(param);
4163
4164 if (bs) {
4165 bdrv_delete(bs);
4166 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004167
4168 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004169}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004170
4171void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004172 int64_t speed, BlockDriverCompletionFunc *cb,
4173 void *opaque, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004174{
4175 BlockJob *job;
4176
4177 if (bs->job || bdrv_in_use(bs)) {
Stefan Hajnoczifd7f8c62012-04-25 16:51:00 +01004178 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004179 return NULL;
4180 }
4181 bdrv_set_in_use(bs, 1);
4182
4183 job = g_malloc0(job_type->instance_size);
4184 job->job_type = job_type;
4185 job->bs = bs;
4186 job->cb = cb;
4187 job->opaque = opaque;
Paolo Bonzini4513eaf2012-05-08 16:51:45 +02004188 job->busy = true;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004189 bs->job = job;
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004190
4191 /* Only set speed when necessary to avoid NotSupported error */
4192 if (speed != 0) {
4193 Error *local_err = NULL;
4194
4195 block_job_set_speed(job, speed, &local_err);
4196 if (error_is_set(&local_err)) {
4197 bs->job = NULL;
4198 g_free(job);
4199 bdrv_set_in_use(bs, 0);
4200 error_propagate(errp, local_err);
4201 return NULL;
4202 }
4203 }
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004204 return job;
4205}
4206
4207void block_job_complete(BlockJob *job, int ret)
4208{
4209 BlockDriverState *bs = job->bs;
4210
4211 assert(bs->job == job);
4212 job->cb(job->opaque, ret);
4213 bs->job = NULL;
4214 g_free(job);
4215 bdrv_set_in_use(bs, 0);
4216}
4217
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004218void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004219{
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004220 Error *local_err = NULL;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004221
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004222 if (!job->job_type->set_speed) {
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004223 error_set(errp, QERR_NOT_SUPPORTED);
4224 return;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004225 }
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004226 job->job_type->set_speed(job, speed, &local_err);
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004227 if (error_is_set(&local_err)) {
4228 error_propagate(errp, local_err);
4229 return;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004230 }
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004231
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004232 job->speed = speed;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004233}
4234
4235void block_job_cancel(BlockJob *job)
4236{
4237 job->cancelled = true;
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004238 if (job->co && !job->busy) {
4239 qemu_coroutine_enter(job->co, NULL);
4240 }
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004241}
4242
4243bool block_job_is_cancelled(BlockJob *job)
4244{
4245 return job->cancelled;
4246}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004247
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004248struct BlockCancelData {
4249 BlockJob *job;
4250 BlockDriverCompletionFunc *cb;
4251 void *opaque;
4252 bool cancelled;
4253 int ret;
4254};
4255
4256static void block_job_cancel_cb(void *opaque, int ret)
Paolo Bonzini3e914652012-03-30 13:17:11 +02004257{
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004258 struct BlockCancelData *data = opaque;
4259
4260 data->cancelled = block_job_is_cancelled(data->job);
4261 data->ret = ret;
4262 data->cb(data->opaque, ret);
4263}
4264
4265int block_job_cancel_sync(BlockJob *job)
4266{
4267 struct BlockCancelData data;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004268 BlockDriverState *bs = job->bs;
4269
4270 assert(bs->job == job);
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004271
4272 /* Set up our own callback to store the result and chain to
4273 * the original callback.
4274 */
4275 data.job = job;
4276 data.cb = job->cb;
4277 data.opaque = job->opaque;
4278 data.ret = -EINPROGRESS;
4279 job->cb = block_job_cancel_cb;
4280 job->opaque = &data;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004281 block_job_cancel(job);
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004282 while (data.ret == -EINPROGRESS) {
Paolo Bonzini3e914652012-03-30 13:17:11 +02004283 qemu_aio_wait();
4284 }
Paolo Bonzinifa4478d2012-05-08 16:51:46 +02004285 return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
Paolo Bonzini3e914652012-03-30 13:17:11 +02004286}
Paolo Bonzini4513eaf2012-05-08 16:51:45 +02004287
4288void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
4289{
4290 /* Check cancellation *before* setting busy = false, too! */
4291 if (!block_job_is_cancelled(job)) {
4292 job->busy = false;
4293 co_sleep_ns(clock, ns);
4294 job->busy = true;
4295 }
4296}