blob: 87600a54dd92df09ab66534293b49bd6aba8d1ca [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010083static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
bellardec530c82006-04-25 22:36:06 +000085
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080086static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010093static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000095
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010096static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000098
Markus Armbrusterf9092b12010-06-25 10:33:39 +020099/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
Markus Armbrustereb852012009-10-27 18:41:44 +0100102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800125/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
201#ifdef _WIN32
202 if (is_windows_drive(path) ||
203 is_windows_drive_prefix(path)) {
204 return 0;
205 }
206#endif
207
208 return strchr(path, ':') != NULL;
209}
210
bellard83f64092006-08-01 16:21:11 +0000211int path_is_absolute(const char *path)
212{
213 const char *p;
bellard21664422007-01-07 18:22:37 +0000214#ifdef _WIN32
215 /* specific case for names like: "\\.\d:" */
216 if (*path == '/' || *path == '\\')
217 return 1;
218#endif
bellard83f64092006-08-01 16:21:11 +0000219 p = strchr(path, ':');
220 if (p)
221 p++;
222 else
223 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000224#ifdef _WIN32
225 return (*p == '/' || *p == '\\');
226#else
227 return (*p == '/');
228#endif
bellard83f64092006-08-01 16:21:11 +0000229}
230
231/* if filename is absolute, just copy it to dest. Otherwise, build a
232 path to it by considering it is relative to base_path. URL are
233 supported. */
234void path_combine(char *dest, int dest_size,
235 const char *base_path,
236 const char *filename)
237{
238 const char *p, *p1;
239 int len;
240
241 if (dest_size <= 0)
242 return;
243 if (path_is_absolute(filename)) {
244 pstrcpy(dest, dest_size, filename);
245 } else {
246 p = strchr(base_path, ':');
247 if (p)
248 p++;
249 else
250 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000251 p1 = strrchr(base_path, '/');
252#ifdef _WIN32
253 {
254 const char *p2;
255 p2 = strrchr(base_path, '\\');
256 if (!p1 || p2 > p1)
257 p1 = p2;
258 }
259#endif
bellard83f64092006-08-01 16:21:11 +0000260 if (p1)
261 p1++;
262 else
263 p1 = base_path;
264 if (p1 > p)
265 p = p1;
266 len = p - base_path;
267 if (len > dest_size - 1)
268 len = dest_size - 1;
269 memcpy(dest, base_path, len);
270 dest[len] = '\0';
271 pstrcat(dest, dest_size, filename);
272 }
273}
274
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500275void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000276{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100277 /* Block drivers without coroutine functions need emulation */
278 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200279 bdrv->bdrv_co_readv = bdrv_co_readv_em;
280 bdrv->bdrv_co_writev = bdrv_co_writev_em;
281
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100282 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
283 * the block driver lacks aio we need to emulate that too.
284 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200285 if (!bdrv->bdrv_aio_readv) {
286 /* add AIO emulation layer */
287 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
288 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200289 }
bellard83f64092006-08-01 16:21:11 +0000290 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200291
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100292 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000293}
bellardb3380822004-03-14 21:38:54 +0000294
295/* create a new block device (by default it is empty) */
296BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000297{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100298 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000299
Anthony Liguori7267c092011-08-20 22:09:37 -0500300 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000301 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000302 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100303 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000304 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300305 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000306 return bs;
307}
308
bellardea2384d2004-08-01 21:59:26 +0000309BlockDriver *bdrv_find_format(const char *format_name)
310{
311 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100312 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
313 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000314 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100315 }
bellardea2384d2004-08-01 21:59:26 +0000316 }
317 return NULL;
318}
319
Markus Armbrustereb852012009-10-27 18:41:44 +0100320static int bdrv_is_whitelisted(BlockDriver *drv)
321{
322 static const char *whitelist[] = {
323 CONFIG_BDRV_WHITELIST
324 };
325 const char **p;
326
327 if (!whitelist[0])
328 return 1; /* no whitelist, anything goes */
329
330 for (p = whitelist; *p; p++) {
331 if (!strcmp(drv->format_name, *p)) {
332 return 1;
333 }
334 }
335 return 0;
336}
337
338BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
339{
340 BlockDriver *drv = bdrv_find_format(format_name);
341 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
342}
343
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800344typedef struct CreateCo {
345 BlockDriver *drv;
346 char *filename;
347 QEMUOptionParameter *options;
348 int ret;
349} CreateCo;
350
351static void coroutine_fn bdrv_create_co_entry(void *opaque)
352{
353 CreateCo *cco = opaque;
354 assert(cco->drv);
355
356 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
357}
358
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200359int bdrv_create(BlockDriver *drv, const char* filename,
360 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000361{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800362 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200363
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800364 Coroutine *co;
365 CreateCo cco = {
366 .drv = drv,
367 .filename = g_strdup(filename),
368 .options = options,
369 .ret = NOT_DONE,
370 };
371
372 if (!drv->bdrv_create) {
373 return -ENOTSUP;
374 }
375
376 if (qemu_in_coroutine()) {
377 /* Fast-path if already in coroutine context */
378 bdrv_create_co_entry(&cco);
379 } else {
380 co = qemu_coroutine_create(bdrv_create_co_entry);
381 qemu_coroutine_enter(co, &cco);
382 while (cco.ret == NOT_DONE) {
383 qemu_aio_wait();
384 }
385 }
386
387 ret = cco.ret;
388 g_free(cco.filename);
389
390 return ret;
bellardea2384d2004-08-01 21:59:26 +0000391}
392
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200393int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
394{
395 BlockDriver *drv;
396
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900397 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200398 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000399 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200400 }
401
402 return bdrv_create(drv, filename, options);
403}
404
bellardd5249392004-08-03 21:14:23 +0000405#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000406void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000407{
bellard3b9f94e2007-01-07 17:27:07 +0000408 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000409
bellard3b9f94e2007-01-07 17:27:07 +0000410 GetTempPath(MAX_PATH, temp_dir);
411 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000412}
413#else
bellard95389c82005-12-18 18:28:15 +0000414void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000415{
416 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000417 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000418 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000419 tmpdir = getenv("TMPDIR");
420 if (!tmpdir)
421 tmpdir = "/tmp";
422 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000423 fd = mkstemp(filename);
424 close(fd);
425}
bellardd5249392004-08-03 21:14:23 +0000426#endif
bellardea2384d2004-08-01 21:59:26 +0000427
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200428/*
429 * Detect host devices. By convention, /dev/cdrom[N] is always
430 * recognized as a host CDROM.
431 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200432static BlockDriver *find_hdev_driver(const char *filename)
433{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200434 int score_max = 0, score;
435 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200436
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100437 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200438 if (d->bdrv_probe_device) {
439 score = d->bdrv_probe_device(filename);
440 if (score > score_max) {
441 score_max = score;
442 drv = d;
443 }
444 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200445 }
446
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200447 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200448}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200449
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900450BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200451{
452 BlockDriver *drv1;
453 char protocol[128];
454 int len;
455 const char *p;
456
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200457 /* TODO Drivers without bdrv_file_open must be specified explicitly */
458
Christoph Hellwig39508e72010-06-23 12:25:17 +0200459 /*
460 * XXX(hch): we really should not let host device detection
461 * override an explicit protocol specification, but moving this
462 * later breaks access to device names with colons in them.
463 * Thanks to the brain-dead persistent naming schemes on udev-
464 * based Linux systems those actually are quite common.
465 */
466 drv1 = find_hdev_driver(filename);
467 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200468 return drv1;
469 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200470
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000471 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200472 return bdrv_find_format("file");
473 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000474 p = strchr(filename, ':');
475 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200476 len = p - filename;
477 if (len > sizeof(protocol) - 1)
478 len = sizeof(protocol) - 1;
479 memcpy(protocol, filename, len);
480 protocol[len] = '\0';
481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
482 if (drv1->protocol_name &&
483 !strcmp(drv1->protocol_name, protocol)) {
484 return drv1;
485 }
486 }
487 return NULL;
488}
489
Stefan Weilc98ac352010-07-21 21:51:51 +0200490static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000491{
bellard83f64092006-08-01 16:21:11 +0000492 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000493 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000494 uint8_t buf[2048];
495 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000496
Naphtali Spreif5edb012010-01-17 16:48:13 +0200497 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200498 if (ret < 0) {
499 *pdrv = NULL;
500 return ret;
501 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700502
Kevin Wolf08a00552010-06-01 18:37:31 +0200503 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
504 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700505 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200506 drv = bdrv_find_format("raw");
507 if (!drv) {
508 ret = -ENOENT;
509 }
510 *pdrv = drv;
511 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700512 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700513
bellard83f64092006-08-01 16:21:11 +0000514 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
515 bdrv_delete(bs);
516 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200517 *pdrv = NULL;
518 return ret;
bellard83f64092006-08-01 16:21:11 +0000519 }
520
bellardea2384d2004-08-01 21:59:26 +0000521 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200522 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100523 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000524 if (drv1->bdrv_probe) {
525 score = drv1->bdrv_probe(buf, ret, filename);
526 if (score > score_max) {
527 score_max = score;
528 drv = drv1;
529 }
bellardea2384d2004-08-01 21:59:26 +0000530 }
531 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200532 if (!drv) {
533 ret = -ENOENT;
534 }
535 *pdrv = drv;
536 return ret;
bellardea2384d2004-08-01 21:59:26 +0000537}
538
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100539/**
540 * Set the current 'total_sectors' value
541 */
542static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
543{
544 BlockDriver *drv = bs->drv;
545
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700546 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
547 if (bs->sg)
548 return 0;
549
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100550 /* query actual device if possible, otherwise just trust the hint */
551 if (drv->bdrv_getlength) {
552 int64_t length = drv->bdrv_getlength(bs);
553 if (length < 0) {
554 return length;
555 }
556 hint = length >> BDRV_SECTOR_BITS;
557 }
558
559 bs->total_sectors = hint;
560 return 0;
561}
562
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100563/**
564 * Set open flags for a given cache mode
565 *
566 * Return 0 on success, -1 if the cache mode was invalid.
567 */
568int bdrv_parse_cache_flags(const char *mode, int *flags)
569{
570 *flags &= ~BDRV_O_CACHE_MASK;
571
572 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
573 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100574 } else if (!strcmp(mode, "directsync")) {
575 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100576 } else if (!strcmp(mode, "writeback")) {
577 *flags |= BDRV_O_CACHE_WB;
578 } else if (!strcmp(mode, "unsafe")) {
579 *flags |= BDRV_O_CACHE_WB;
580 *flags |= BDRV_O_NO_FLUSH;
581 } else if (!strcmp(mode, "writethrough")) {
582 /* this is the default */
583 } else {
584 return -1;
585 }
586
587 return 0;
588}
589
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000590/**
591 * The copy-on-read flag is actually a reference count so multiple users may
592 * use the feature without worrying about clobbering its previous state.
593 * Copy-on-read stays enabled until all users have called to disable it.
594 */
595void bdrv_enable_copy_on_read(BlockDriverState *bs)
596{
597 bs->copy_on_read++;
598}
599
600void bdrv_disable_copy_on_read(BlockDriverState *bs)
601{
602 assert(bs->copy_on_read > 0);
603 bs->copy_on_read--;
604}
605
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200606/*
Kevin Wolf57915332010-04-14 15:24:50 +0200607 * Common part for opening disk images and files
608 */
609static int bdrv_open_common(BlockDriverState *bs, const char *filename,
610 int flags, BlockDriver *drv)
611{
612 int ret, open_flags;
613
614 assert(drv != NULL);
615
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100616 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
617
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200618 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100619 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200620 bs->encrypted = 0;
621 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100622 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200623 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100624 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200625 bs->buffer_alignment = 512;
626
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000627 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
628 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
629 bdrv_enable_copy_on_read(bs);
630 }
631
Kevin Wolf57915332010-04-14 15:24:50 +0200632 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100633 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200634
635 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
636 return -ENOTSUP;
637 }
638
639 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500640 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200641
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100642 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200643
644 /*
645 * Clear flags that are internal to the block layer before opening the
646 * image.
647 */
648 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649
650 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200651 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200652 */
653 if (bs->is_temporary) {
654 open_flags |= BDRV_O_RDWR;
655 }
656
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100657 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
658
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200659 /* Open the image, either directly or using a protocol */
660 if (drv->bdrv_file_open) {
661 ret = drv->bdrv_file_open(bs, filename, open_flags);
662 } else {
663 ret = bdrv_file_open(&bs->file, filename, open_flags);
664 if (ret >= 0) {
665 ret = drv->bdrv_open(bs, open_flags);
666 }
667 }
668
Kevin Wolf57915332010-04-14 15:24:50 +0200669 if (ret < 0) {
670 goto free_and_fail;
671 }
672
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100673 ret = refresh_total_sectors(bs, bs->total_sectors);
674 if (ret < 0) {
675 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200676 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100677
Kevin Wolf57915332010-04-14 15:24:50 +0200678#ifndef _WIN32
679 if (bs->is_temporary) {
680 unlink(filename);
681 }
682#endif
683 return 0;
684
685free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200686 if (bs->file) {
687 bdrv_delete(bs->file);
688 bs->file = NULL;
689 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500690 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200691 bs->opaque = NULL;
692 bs->drv = NULL;
693 return ret;
694}
695
696/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200697 * Opens a file using a protocol (file, host_device, nbd, ...)
698 */
bellard83f64092006-08-01 16:21:11 +0000699int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000700{
bellard83f64092006-08-01 16:21:11 +0000701 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200702 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000703 int ret;
704
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900705 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200706 if (!drv) {
707 return -ENOENT;
708 }
709
bellard83f64092006-08-01 16:21:11 +0000710 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200711 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000712 if (ret < 0) {
713 bdrv_delete(bs);
714 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000715 }
aliguori71d07702009-03-03 17:37:16 +0000716 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000717 *pbs = bs;
718 return 0;
bellardea2384d2004-08-01 21:59:26 +0000719}
bellardfc01f7e2003-06-30 10:03:06 +0000720
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200721/*
722 * Opens a disk image (raw, qcow2, vmdk, ...)
723 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200724int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
725 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000726{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200727 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200728 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000729
bellard83f64092006-08-01 16:21:11 +0000730 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000731 BlockDriverState *bs1;
732 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000733 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200734 BlockDriver *bdrv_qcow2;
735 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000737
bellardea2384d2004-08-01 21:59:26 +0000738 /* if snapshot, we create a temporary backing file and open it
739 instead of opening 'filename' directly */
740
741 /* if there is a backing file, use it */
742 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200743 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000744 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000745 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000746 return ret;
bellardea2384d2004-08-01 21:59:26 +0000747 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200748 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000749
750 if (bs1->drv && bs1->drv->protocol_name)
751 is_protocol = 1;
752
bellardea2384d2004-08-01 21:59:26 +0000753 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000754
bellardea2384d2004-08-01 21:59:26 +0000755 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000756
757 /* Real path is meaningless for protocols */
758 if (is_protocol)
759 snprintf(backing_filename, sizeof(backing_filename),
760 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000761 else if (!realpath(filename, backing_filename))
762 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000763
Kevin Wolf91a073a2009-05-27 14:48:06 +0200764 bdrv_qcow2 = bdrv_find_format("qcow2");
765 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
766
Jes Sorensen3e829902010-05-27 16:20:30 +0200767 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200768 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
769 if (drv) {
770 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
771 drv->format_name);
772 }
773
774 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200775 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000776 if (ret < 0) {
777 return ret;
bellardea2384d2004-08-01 21:59:26 +0000778 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200779
bellardea2384d2004-08-01 21:59:26 +0000780 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200781 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000782 bs->is_temporary = 1;
783 }
bellard712e7872005-04-28 21:09:32 +0000784
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200785 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200786 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200787 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000788 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100789
aliguori51d7c002009-03-05 23:00:29 +0000790 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000791 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000792 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200793
794 /* Open the image */
795 ret = bdrv_open_common(bs, filename, flags, drv);
796 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100797 goto unlink_and_fail;
798 }
799
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200800 /* If there is a backing file, use it */
801 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
802 char backing_filename[PATH_MAX];
803 int back_flags;
804 BlockDriver *back_drv = NULL;
805
806 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000807
808 if (path_has_protocol(bs->backing_file)) {
809 pstrcpy(backing_filename, sizeof(backing_filename),
810 bs->backing_file);
811 } else {
812 path_combine(backing_filename, sizeof(backing_filename),
813 filename, bs->backing_file);
814 }
815
816 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200817 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000818 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200819
820 /* backing files always opened read-only */
821 back_flags =
822 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
823
824 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
825 if (ret < 0) {
826 bdrv_close(bs);
827 return ret;
828 }
829 if (bs->is_temporary) {
830 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
831 } else {
832 /* base image inherits from "parent" */
833 bs->backing_hd->keep_read_only = bs->keep_read_only;
834 }
835 }
836
837 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200838 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200839 }
840
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800841 /* throttling disk I/O limits */
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_enable(bs);
844 }
845
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200846 return 0;
847
848unlink_and_fail:
849 if (bs->is_temporary) {
850 unlink(filename);
851 }
852 return ret;
853}
854
bellardfc01f7e2003-06-30 10:03:06 +0000855void bdrv_close(BlockDriverState *bs)
856{
Liu Yuan80ccf932012-04-20 17:10:56 +0800857 bdrv_flush(bs);
bellard19cb3732006-08-19 11:45:59 +0000858 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200859 if (bs->job) {
860 block_job_cancel_sync(bs->job);
861 }
Kevin Wolf7094f122012-04-11 11:06:37 +0200862 bdrv_drain_all();
863
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200864 if (bs == bs_snapshots) {
865 bs_snapshots = NULL;
866 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100867 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000868 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100869 bs->backing_hd = NULL;
870 }
bellardea2384d2004-08-01 21:59:26 +0000871 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500872 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000873#ifdef _WIN32
874 if (bs->is_temporary) {
875 unlink(bs->filename);
876 }
bellard67b915a2004-03-31 23:37:16 +0000877#endif
bellardea2384d2004-08-01 21:59:26 +0000878 bs->opaque = NULL;
879 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000880 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000881
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200882 if (bs->file != NULL) {
883 bdrv_close(bs->file);
884 }
885
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200886 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000887 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800888
889 /*throttling disk I/O limits*/
890 if (bs->io_limits_enabled) {
891 bdrv_io_limits_disable(bs);
892 }
bellardb3380822004-03-14 21:38:54 +0000893}
894
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900895void bdrv_close_all(void)
896{
897 BlockDriverState *bs;
898
899 QTAILQ_FOREACH(bs, &bdrv_states, list) {
900 bdrv_close(bs);
901 }
902}
903
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000904/*
905 * Wait for pending requests to complete across all BlockDriverStates
906 *
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200909 *
910 * Note that completion of an asynchronous I/O operation can trigger any
911 * number of other I/O operations on other devices---for example a coroutine
912 * can be arbitrarily complex and a constant flow of I/O can come until the
913 * coroutine is complete. Because of this, it is not possible to have a
914 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000915 */
916void bdrv_drain_all(void)
917{
918 BlockDriverState *bs;
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200919 bool busy;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000920
Zhi Yong Wu4c355d52012-04-12 14:00:57 +0200921 do {
922 busy = qemu_aio_wait();
923
924 /* FIXME: We do not have timer support here, so this is effectively
925 * a busy wait.
926 */
927 QTAILQ_FOREACH(bs, &bdrv_states, list) {
928 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
929 qemu_co_queue_restart_all(&bs->throttled_reqs);
930 busy = true;
931 }
932 }
933 } while (busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000934
935 /* If requests are still pending there is a bug somewhere */
936 QTAILQ_FOREACH(bs, &bdrv_states, list) {
937 assert(QLIST_EMPTY(&bs->tracked_requests));
938 assert(qemu_co_queue_empty(&bs->throttled_reqs));
939 }
940}
941
Ryan Harperd22b2f42011-03-29 20:51:47 -0500942/* make a BlockDriverState anonymous by removing from bdrv_state list.
943 Also, NULL terminate the device_name to prevent double remove */
944void bdrv_make_anon(BlockDriverState *bs)
945{
946 if (bs->device_name[0] != '\0') {
947 QTAILQ_REMOVE(&bdrv_states, bs, list);
948 }
949 bs->device_name[0] = '\0';
950}
951
Paolo Bonzinie023b2e2012-05-08 16:51:41 +0200952static void bdrv_rebind(BlockDriverState *bs)
953{
954 if (bs->drv && bs->drv->bdrv_rebind) {
955 bs->drv->bdrv_rebind(bs);
956 }
957}
958
Jeff Cody8802d1f2012-02-28 15:54:06 -0500959/*
960 * Add new bs contents at the top of an image chain while the chain is
961 * live, while keeping required fields on the top layer.
962 *
963 * This will modify the BlockDriverState fields, and swap contents
964 * between bs_new and bs_top. Both bs_new and bs_top are modified.
965 *
Jeff Codyf6801b82012-03-27 16:30:19 -0400966 * bs_new is required to be anonymous.
967 *
Jeff Cody8802d1f2012-02-28 15:54:06 -0500968 * This function does not create any image files.
969 */
970void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
971{
972 BlockDriverState tmp;
973
Jeff Codyf6801b82012-03-27 16:30:19 -0400974 /* bs_new must be anonymous */
975 assert(bs_new->device_name[0] == '\0');
Jeff Cody8802d1f2012-02-28 15:54:06 -0500976
977 tmp = *bs_new;
978
979 /* there are some fields that need to stay on the top layer: */
980
981 /* dev info */
982 tmp.dev_ops = bs_top->dev_ops;
983 tmp.dev_opaque = bs_top->dev_opaque;
984 tmp.dev = bs_top->dev;
985 tmp.buffer_alignment = bs_top->buffer_alignment;
986 tmp.copy_on_read = bs_top->copy_on_read;
987
988 /* i/o timing parameters */
989 tmp.slice_time = bs_top->slice_time;
990 tmp.slice_start = bs_top->slice_start;
991 tmp.slice_end = bs_top->slice_end;
992 tmp.io_limits = bs_top->io_limits;
993 tmp.io_base = bs_top->io_base;
994 tmp.throttled_reqs = bs_top->throttled_reqs;
995 tmp.block_timer = bs_top->block_timer;
996 tmp.io_limits_enabled = bs_top->io_limits_enabled;
997
998 /* geometry */
999 tmp.cyls = bs_top->cyls;
1000 tmp.heads = bs_top->heads;
1001 tmp.secs = bs_top->secs;
1002 tmp.translation = bs_top->translation;
1003
1004 /* r/w error */
1005 tmp.on_read_error = bs_top->on_read_error;
1006 tmp.on_write_error = bs_top->on_write_error;
1007
1008 /* i/o status */
1009 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1010 tmp.iostatus = bs_top->iostatus;
1011
1012 /* keep the same entry in bdrv_states */
1013 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1014 tmp.list = bs_top->list;
1015
1016 /* The contents of 'tmp' will become bs_top, as we are
1017 * swapping bs_new and bs_top contents. */
1018 tmp.backing_hd = bs_new;
1019 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
Jeff Codyf6801b82012-03-27 16:30:19 -04001020 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
Jeff Cody8802d1f2012-02-28 15:54:06 -05001021
1022 /* swap contents of the fixed new bs and the current top */
1023 *bs_new = *bs_top;
1024 *bs_top = tmp;
1025
Jeff Codyf6801b82012-03-27 16:30:19 -04001026 /* device_name[] was carried over from the old bs_top. bs_new
1027 * shouldn't be in bdrv_states, so we need to make device_name[]
1028 * reflect the anonymity of bs_new
1029 */
1030 bs_new->device_name[0] = '\0';
1031
Jeff Cody8802d1f2012-02-28 15:54:06 -05001032 /* clear the copied fields in the new backing file */
1033 bdrv_detach_dev(bs_new, bs_new->dev);
1034
1035 qemu_co_queue_init(&bs_new->throttled_reqs);
1036 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1037 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1038 bdrv_iostatus_disable(bs_new);
1039
1040 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1041 * to affect or delete the block_timer, as it has been moved to bs_top */
1042 bs_new->io_limits_enabled = false;
1043 bs_new->block_timer = NULL;
1044 bs_new->slice_time = 0;
1045 bs_new->slice_start = 0;
1046 bs_new->slice_end = 0;
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001047
1048 bdrv_rebind(bs_new);
1049 bdrv_rebind(bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001050}
1051
bellardb3380822004-03-14 21:38:54 +00001052void bdrv_delete(BlockDriverState *bs)
1053{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001054 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001055 assert(!bs->job);
1056 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +02001057
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001058 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001059 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001060
bellardb3380822004-03-14 21:38:54 +00001061 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001062 if (bs->file != NULL) {
1063 bdrv_delete(bs->file);
1064 }
1065
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001066 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -05001067 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001068}
1069
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001070int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1071/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001072{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001073 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001074 return -EBUSY;
1075 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001076 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001077 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001078 return 0;
1079}
1080
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001081/* TODO qdevified devices don't use this, remove when devices are qdevified */
1082void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001083{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001084 if (bdrv_attach_dev(bs, dev) < 0) {
1085 abort();
1086 }
1087}
1088
1089void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1090/* TODO change to DeviceState *dev when all users are qdevified */
1091{
1092 assert(bs->dev == dev);
1093 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001094 bs->dev_ops = NULL;
1095 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001096 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001097}
1098
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001099/* TODO change to return DeviceState * when all users are qdevified */
1100void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001101{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001102 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001103}
1104
Markus Armbruster0e49de52011-08-03 15:07:41 +02001105void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1106 void *opaque)
1107{
1108 bs->dev_ops = ops;
1109 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001110 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1111 bs_snapshots = NULL;
1112 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001113}
1114
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001115void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1116 BlockQMPEventAction action, int is_read)
1117{
1118 QObject *data;
1119 const char *action_str;
1120
1121 switch (action) {
1122 case BDRV_ACTION_REPORT:
1123 action_str = "report";
1124 break;
1125 case BDRV_ACTION_IGNORE:
1126 action_str = "ignore";
1127 break;
1128 case BDRV_ACTION_STOP:
1129 action_str = "stop";
1130 break;
1131 default:
1132 abort();
1133 }
1134
1135 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1136 bdrv->device_name,
1137 action_str,
1138 is_read ? "read" : "write");
1139 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1140
1141 qobject_decref(data);
1142}
1143
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001144static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1145{
1146 QObject *data;
1147
1148 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1149 bdrv_get_device_name(bs), ejected);
1150 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1151
1152 qobject_decref(data);
1153}
1154
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001155static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001156{
Markus Armbruster145feb12011-08-03 15:07:42 +02001157 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001158 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001159 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001160 if (tray_was_closed) {
1161 /* tray open */
1162 bdrv_emit_qmp_eject_event(bs, true);
1163 }
1164 if (load) {
1165 /* tray close */
1166 bdrv_emit_qmp_eject_event(bs, false);
1167 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001168 }
1169}
1170
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001171bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1172{
1173 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1174}
1175
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001176void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1177{
1178 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1179 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1180 }
1181}
1182
Markus Armbrustere4def802011-09-06 18:58:53 +02001183bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1184{
1185 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1186 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1187 }
1188 return false;
1189}
1190
Markus Armbruster145feb12011-08-03 15:07:42 +02001191static void bdrv_dev_resize_cb(BlockDriverState *bs)
1192{
1193 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1194 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001195 }
1196}
1197
Markus Armbrusterf1076392011-09-06 18:58:46 +02001198bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1199{
1200 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1201 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1202 }
1203 return false;
1204}
1205
aliguorie97fc192009-04-21 23:11:50 +00001206/*
1207 * Run consistency checks on an image
1208 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001209 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001210 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001211 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001212 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001213int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001214{
1215 if (bs->drv->bdrv_check == NULL) {
1216 return -ENOTSUP;
1217 }
1218
Kevin Wolfe076f332010-06-29 11:43:13 +02001219 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001220 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001221}
1222
Kevin Wolf8a426612010-07-16 17:17:01 +02001223#define COMMIT_BUF_SECTORS 2048
1224
bellard33e39632003-07-06 17:15:21 +00001225/* commit COW file into the raw image */
1226int bdrv_commit(BlockDriverState *bs)
1227{
bellard19cb3732006-08-19 11:45:59 +00001228 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001229 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001230 int64_t sector, total_sectors;
1231 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001232 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001233 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001234 char filename[1024];
1235 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001236
bellard19cb3732006-08-19 11:45:59 +00001237 if (!drv)
1238 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001239
1240 if (!bs->backing_hd) {
1241 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001242 }
1243
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001244 if (bs->backing_hd->keep_read_only) {
1245 return -EACCES;
1246 }
Kevin Wolfee181192010-08-05 13:05:22 +02001247
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001248 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1249 return -EBUSY;
1250 }
1251
Kevin Wolfee181192010-08-05 13:05:22 +02001252 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001253 ro = bs->backing_hd->read_only;
1254 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1255 open_flags = bs->backing_hd->open_flags;
1256
1257 if (ro) {
1258 /* re-open as RW */
1259 bdrv_delete(bs->backing_hd);
1260 bs->backing_hd = NULL;
1261 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001262 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1263 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001264 if (rw_ret < 0) {
1265 bdrv_delete(bs_rw);
1266 /* try to re-open read-only */
1267 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001268 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1269 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001270 if (ret < 0) {
1271 bdrv_delete(bs_ro);
1272 /* drive not functional anymore */
1273 bs->drv = NULL;
1274 return ret;
1275 }
1276 bs->backing_hd = bs_ro;
1277 return rw_ret;
1278 }
1279 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001280 }
bellardea2384d2004-08-01 21:59:26 +00001281
Jan Kiszka6ea44302009-11-30 18:21:19 +01001282 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001283 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001284
Kevin Wolf8a426612010-07-16 17:17:01 +02001285 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001286 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001287
1288 if (bdrv_read(bs, sector, buf, n) != 0) {
1289 ret = -EIO;
1290 goto ro_cleanup;
1291 }
1292
1293 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1294 ret = -EIO;
1295 goto ro_cleanup;
1296 }
bellardea2384d2004-08-01 21:59:26 +00001297 }
1298 }
bellard95389c82005-12-18 18:28:15 +00001299
Christoph Hellwig1d449522010-01-17 12:32:30 +01001300 if (drv->bdrv_make_empty) {
1301 ret = drv->bdrv_make_empty(bs);
1302 bdrv_flush(bs);
1303 }
bellard95389c82005-12-18 18:28:15 +00001304
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001305 /*
1306 * Make sure all data we wrote to the backing device is actually
1307 * stable on disk.
1308 */
1309 if (bs->backing_hd)
1310 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001311
1312ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001313 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001314
1315 if (ro) {
1316 /* re-open as RO */
1317 bdrv_delete(bs->backing_hd);
1318 bs->backing_hd = NULL;
1319 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001320 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1321 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001322 if (ret < 0) {
1323 bdrv_delete(bs_ro);
1324 /* drive not functional anymore */
1325 bs->drv = NULL;
1326 return ret;
1327 }
1328 bs->backing_hd = bs_ro;
1329 bs->backing_hd->keep_read_only = 0;
1330 }
1331
Christoph Hellwig1d449522010-01-17 12:32:30 +01001332 return ret;
bellard33e39632003-07-06 17:15:21 +00001333}
1334
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001335int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001336{
1337 BlockDriverState *bs;
1338
1339 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001340 int ret = bdrv_commit(bs);
1341 if (ret < 0) {
1342 return ret;
1343 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001344 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001345 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001346}
1347
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001348struct BdrvTrackedRequest {
1349 BlockDriverState *bs;
1350 int64_t sector_num;
1351 int nb_sectors;
1352 bool is_write;
1353 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001354 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001355 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001356};
1357
1358/**
1359 * Remove an active request from the tracked requests list
1360 *
1361 * This function should be called when a tracked request is completing.
1362 */
1363static void tracked_request_end(BdrvTrackedRequest *req)
1364{
1365 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001366 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001367}
1368
1369/**
1370 * Add an active request to the tracked requests list
1371 */
1372static void tracked_request_begin(BdrvTrackedRequest *req,
1373 BlockDriverState *bs,
1374 int64_t sector_num,
1375 int nb_sectors, bool is_write)
1376{
1377 *req = (BdrvTrackedRequest){
1378 .bs = bs,
1379 .sector_num = sector_num,
1380 .nb_sectors = nb_sectors,
1381 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001382 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001383 };
1384
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001385 qemu_co_queue_init(&req->wait_queue);
1386
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001387 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1388}
1389
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001390/**
1391 * Round a region to cluster boundaries
1392 */
1393static void round_to_clusters(BlockDriverState *bs,
1394 int64_t sector_num, int nb_sectors,
1395 int64_t *cluster_sector_num,
1396 int *cluster_nb_sectors)
1397{
1398 BlockDriverInfo bdi;
1399
1400 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1401 *cluster_sector_num = sector_num;
1402 *cluster_nb_sectors = nb_sectors;
1403 } else {
1404 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1405 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1406 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1407 nb_sectors, c);
1408 }
1409}
1410
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001411static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1412 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001413 /* aaaa bbbb */
1414 if (sector_num >= req->sector_num + req->nb_sectors) {
1415 return false;
1416 }
1417 /* bbbb aaaa */
1418 if (req->sector_num >= sector_num + nb_sectors) {
1419 return false;
1420 }
1421 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001422}
1423
1424static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1425 int64_t sector_num, int nb_sectors)
1426{
1427 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001428 int64_t cluster_sector_num;
1429 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001430 bool retry;
1431
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001432 /* If we touch the same cluster it counts as an overlap. This guarantees
1433 * that allocating writes will be serialized and not race with each other
1434 * for the same cluster. For example, in copy-on-read it ensures that the
1435 * CoR read and write operations are atomic and guest writes cannot
1436 * interleave between them.
1437 */
1438 round_to_clusters(bs, sector_num, nb_sectors,
1439 &cluster_sector_num, &cluster_nb_sectors);
1440
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001441 do {
1442 retry = false;
1443 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001444 if (tracked_request_overlaps(req, cluster_sector_num,
1445 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001446 /* Hitting this means there was a reentrant request, for
1447 * example, a block driver issuing nested requests. This must
1448 * never happen since it means deadlock.
1449 */
1450 assert(qemu_coroutine_self() != req->co);
1451
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001452 qemu_co_queue_wait(&req->wait_queue);
1453 retry = true;
1454 break;
1455 }
1456 }
1457 } while (retry);
1458}
1459
Kevin Wolf756e6732010-01-12 12:55:17 +01001460/*
1461 * Return values:
1462 * 0 - success
1463 * -EINVAL - backing format specified, but no file
1464 * -ENOSPC - can't update the backing file because no space is left in the
1465 * image file header
1466 * -ENOTSUP - format driver doesn't support changing the backing file
1467 */
1468int bdrv_change_backing_file(BlockDriverState *bs,
1469 const char *backing_file, const char *backing_fmt)
1470{
1471 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02001472 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001473
Paolo Bonzini5f377792012-04-12 14:01:01 +02001474 /* Backing file format doesn't make sense without a backing file */
1475 if (backing_fmt && !backing_file) {
1476 return -EINVAL;
1477 }
1478
Kevin Wolf756e6732010-01-12 12:55:17 +01001479 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001480 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01001481 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001482 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01001483 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02001484
1485 if (ret == 0) {
1486 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1487 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1488 }
1489 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001490}
1491
aliguori71d07702009-03-03 17:37:16 +00001492static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1493 size_t size)
1494{
1495 int64_t len;
1496
1497 if (!bdrv_is_inserted(bs))
1498 return -ENOMEDIUM;
1499
1500 if (bs->growable)
1501 return 0;
1502
1503 len = bdrv_getlength(bs);
1504
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001505 if (offset < 0)
1506 return -EIO;
1507
1508 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001509 return -EIO;
1510
1511 return 0;
1512}
1513
1514static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1515 int nb_sectors)
1516{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001517 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1518 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001519}
1520
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001521typedef struct RwCo {
1522 BlockDriverState *bs;
1523 int64_t sector_num;
1524 int nb_sectors;
1525 QEMUIOVector *qiov;
1526 bool is_write;
1527 int ret;
1528} RwCo;
1529
1530static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1531{
1532 RwCo *rwco = opaque;
1533
1534 if (!rwco->is_write) {
1535 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001536 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001537 } else {
1538 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001539 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001540 }
1541}
1542
1543/*
1544 * Process a synchronous request using coroutines
1545 */
1546static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1547 int nb_sectors, bool is_write)
1548{
1549 QEMUIOVector qiov;
1550 struct iovec iov = {
1551 .iov_base = (void *)buf,
1552 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1553 };
1554 Coroutine *co;
1555 RwCo rwco = {
1556 .bs = bs,
1557 .sector_num = sector_num,
1558 .nb_sectors = nb_sectors,
1559 .qiov = &qiov,
1560 .is_write = is_write,
1561 .ret = NOT_DONE,
1562 };
1563
1564 qemu_iovec_init_external(&qiov, &iov, 1);
1565
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001566 /**
1567 * In sync call context, when the vcpu is blocked, this throttling timer
1568 * will not fire; so the I/O throttling function has to be disabled here
1569 * if it has been enabled.
1570 */
1571 if (bs->io_limits_enabled) {
1572 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1573 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1574 bdrv_io_limits_disable(bs);
1575 }
1576
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001577 if (qemu_in_coroutine()) {
1578 /* Fast-path if already in coroutine context */
1579 bdrv_rw_co_entry(&rwco);
1580 } else {
1581 co = qemu_coroutine_create(bdrv_rw_co_entry);
1582 qemu_coroutine_enter(co, &rwco);
1583 while (rwco.ret == NOT_DONE) {
1584 qemu_aio_wait();
1585 }
1586 }
1587 return rwco.ret;
1588}
1589
bellard19cb3732006-08-19 11:45:59 +00001590/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001591int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001592 uint8_t *buf, int nb_sectors)
1593{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001594 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001595}
1596
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001597#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1598
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001599static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001600 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001601{
1602 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001603 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001604
Jan Kiszka6ea44302009-11-30 18:21:19 +01001605 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001606 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001607
1608 for (; start <= end; start++) {
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001609 idx = start / BITS_PER_LONG;
1610 bit = start % BITS_PER_LONG;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001611 val = bs->dirty_bitmap[idx];
1612 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001613 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001614 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001615 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001616 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001617 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001618 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001619 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001620 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001621 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001622 }
1623 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001624 }
1625}
1626
ths5fafdf22007-09-16 21:08:06 +00001627/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001628 -EIO generic I/O error (may happen for all errors)
1629 -ENOMEDIUM No media inserted.
1630 -EINVAL Invalid sector number or nb_sectors
1631 -EACCES Trying to write a read-only device
1632*/
ths5fafdf22007-09-16 21:08:06 +00001633int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001634 const uint8_t *buf, int nb_sectors)
1635{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001636 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001637}
1638
aliguorieda578e2009-03-12 19:57:16 +00001639int bdrv_pread(BlockDriverState *bs, int64_t offset,
1640 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001641{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001642 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001643 int len, nb_sectors, count;
1644 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001645 int ret;
bellard83f64092006-08-01 16:21:11 +00001646
1647 count = count1;
1648 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001649 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001650 if (len > count)
1651 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001652 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001653 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001654 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1655 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001656 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001657 count -= len;
1658 if (count == 0)
1659 return count1;
1660 sector_num++;
1661 buf += len;
1662 }
1663
1664 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001665 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001666 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001667 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1668 return ret;
bellard83f64092006-08-01 16:21:11 +00001669 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001670 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001671 buf += len;
1672 count -= len;
1673 }
1674
1675 /* add data from the last sector */
1676 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001677 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1678 return ret;
bellard83f64092006-08-01 16:21:11 +00001679 memcpy(buf, tmp_buf, count);
1680 }
1681 return count1;
1682}
1683
aliguorieda578e2009-03-12 19:57:16 +00001684int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1685 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001686{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001687 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001688 int len, nb_sectors, count;
1689 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001690 int ret;
bellard83f64092006-08-01 16:21:11 +00001691
1692 count = count1;
1693 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001694 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001695 if (len > count)
1696 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001697 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001698 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001699 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1700 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001701 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001702 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1703 return ret;
bellard83f64092006-08-01 16:21:11 +00001704 count -= len;
1705 if (count == 0)
1706 return count1;
1707 sector_num++;
1708 buf += len;
1709 }
1710
1711 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001712 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001713 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001714 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1715 return ret;
bellard83f64092006-08-01 16:21:11 +00001716 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001717 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001718 buf += len;
1719 count -= len;
1720 }
1721
1722 /* add data from the last sector */
1723 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001724 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1725 return ret;
bellard83f64092006-08-01 16:21:11 +00001726 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001727 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1728 return ret;
bellard83f64092006-08-01 16:21:11 +00001729 }
1730 return count1;
1731}
bellard83f64092006-08-01 16:21:11 +00001732
Kevin Wolff08145f2010-06-16 16:38:15 +02001733/*
1734 * Writes to the file and ensures that no writes are reordered across this
1735 * request (acts as a barrier)
1736 *
1737 * Returns 0 on success, -errno in error cases.
1738 */
1739int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1740 const void *buf, int count)
1741{
1742 int ret;
1743
1744 ret = bdrv_pwrite(bs, offset, buf, count);
1745 if (ret < 0) {
1746 return ret;
1747 }
1748
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001749 /* No flush needed for cache modes that use O_DSYNC */
1750 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001751 bdrv_flush(bs);
1752 }
1753
1754 return 0;
1755}
1756
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001757static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001758 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1759{
1760 /* Perform I/O through a temporary buffer so that users who scribble over
1761 * their read buffer while the operation is in progress do not end up
1762 * modifying the image file. This is critical for zero-copy guest I/O
1763 * where anything might happen inside guest memory.
1764 */
1765 void *bounce_buffer;
1766
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001767 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001768 struct iovec iov;
1769 QEMUIOVector bounce_qiov;
1770 int64_t cluster_sector_num;
1771 int cluster_nb_sectors;
1772 size_t skip_bytes;
1773 int ret;
1774
1775 /* Cover entire cluster so no additional backing file I/O is required when
1776 * allocating cluster in the image file.
1777 */
1778 round_to_clusters(bs, sector_num, nb_sectors,
1779 &cluster_sector_num, &cluster_nb_sectors);
1780
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001781 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1782 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001783
1784 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1785 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1786 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1787
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001788 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1789 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001790 if (ret < 0) {
1791 goto err;
1792 }
1793
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001794 if (drv->bdrv_co_write_zeroes &&
1795 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001796 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1797 cluster_nb_sectors);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001798 } else {
1799 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001800 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001801 }
1802
Stefan Hajnocziab185922011-11-17 13:40:31 +00001803 if (ret < 0) {
1804 /* It might be okay to ignore write errors for guest requests. If this
1805 * is a deliberate copy-on-read then we don't want to ignore the error.
1806 * Simply report it in all cases.
1807 */
1808 goto err;
1809 }
1810
1811 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1812 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1813 nb_sectors * BDRV_SECTOR_SIZE);
1814
1815err:
1816 qemu_vfree(bounce_buffer);
1817 return ret;
1818}
1819
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001820/*
1821 * Handle a read request in coroutine context
1822 */
1823static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001824 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1825 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001826{
1827 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001828 BdrvTrackedRequest req;
1829 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001830
Kevin Wolfda1fa912011-07-14 17:27:13 +02001831 if (!drv) {
1832 return -ENOMEDIUM;
1833 }
1834 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1835 return -EIO;
1836 }
1837
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001838 /* throttling disk read I/O */
1839 if (bs->io_limits_enabled) {
1840 bdrv_io_limits_intercept(bs, false, nb_sectors);
1841 }
1842
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001843 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001844 flags |= BDRV_REQ_COPY_ON_READ;
1845 }
1846 if (flags & BDRV_REQ_COPY_ON_READ) {
1847 bs->copy_on_read_in_flight++;
1848 }
1849
1850 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001851 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1852 }
1853
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001854 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001855
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001856 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001857 int pnum;
1858
1859 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1860 if (ret < 0) {
1861 goto out;
1862 }
1863
1864 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001865 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001866 goto out;
1867 }
1868 }
1869
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001870 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001871
1872out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001873 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001874
1875 if (flags & BDRV_REQ_COPY_ON_READ) {
1876 bs->copy_on_read_in_flight--;
1877 }
1878
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001879 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001880}
1881
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001882int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001883 int nb_sectors, QEMUIOVector *qiov)
1884{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001885 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001886
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001887 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1888}
1889
1890int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1891 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1892{
1893 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1894
1895 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1896 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001897}
1898
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001899static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1900 int64_t sector_num, int nb_sectors)
1901{
1902 BlockDriver *drv = bs->drv;
1903 QEMUIOVector qiov;
1904 struct iovec iov;
1905 int ret;
1906
Kevin Wolf621f0582012-03-20 15:12:58 +01001907 /* TODO Emulate only part of misaligned requests instead of letting block
1908 * drivers return -ENOTSUP and emulate everything */
1909
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001910 /* First try the efficient write zeroes operation */
1911 if (drv->bdrv_co_write_zeroes) {
Kevin Wolf621f0582012-03-20 15:12:58 +01001912 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1913 if (ret != -ENOTSUP) {
1914 return ret;
1915 }
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001916 }
1917
1918 /* Fall back to bounce buffer if write zeroes is unsupported */
1919 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1920 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1921 memset(iov.iov_base, 0, iov.iov_len);
1922 qemu_iovec_init_external(&qiov, &iov, 1);
1923
1924 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1925
1926 qemu_vfree(iov.iov_base);
1927 return ret;
1928}
1929
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001930/*
1931 * Handle a write request in coroutine context
1932 */
1933static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001934 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1935 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001936{
1937 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001938 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001939 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001940
1941 if (!bs->drv) {
1942 return -ENOMEDIUM;
1943 }
1944 if (bs->read_only) {
1945 return -EACCES;
1946 }
1947 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1948 return -EIO;
1949 }
1950
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001951 /* throttling disk write I/O */
1952 if (bs->io_limits_enabled) {
1953 bdrv_io_limits_intercept(bs, true, nb_sectors);
1954 }
1955
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001956 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001957 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1958 }
1959
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001960 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1961
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001962 if (flags & BDRV_REQ_ZERO_WRITE) {
1963 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1964 } else {
1965 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1966 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001967
Kevin Wolfda1fa912011-07-14 17:27:13 +02001968 if (bs->dirty_bitmap) {
1969 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1970 }
1971
1972 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1973 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1974 }
1975
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001976 tracked_request_end(&req);
1977
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001978 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001979}
1980
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001981int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1982 int nb_sectors, QEMUIOVector *qiov)
1983{
1984 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1985
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001986 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1987}
1988
1989int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1990 int64_t sector_num, int nb_sectors)
1991{
1992 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1993
1994 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1995 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001996}
1997
bellard83f64092006-08-01 16:21:11 +00001998/**
bellard83f64092006-08-01 16:21:11 +00001999 * Truncate file to 'offset' bytes (needed only for file protocols)
2000 */
2001int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2002{
2003 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002004 int ret;
bellard83f64092006-08-01 16:21:11 +00002005 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002006 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00002007 if (!drv->bdrv_truncate)
2008 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02002009 if (bs->read_only)
2010 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02002011 if (bdrv_in_use(bs))
2012 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002013 ret = drv->bdrv_truncate(bs, offset);
2014 if (ret == 0) {
2015 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02002016 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002017 }
2018 return ret;
bellard83f64092006-08-01 16:21:11 +00002019}
2020
2021/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08002022 * Length of a allocated file in bytes. Sparse files are counted by actual
2023 * allocated space. Return < 0 if error or unknown.
2024 */
2025int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2026{
2027 BlockDriver *drv = bs->drv;
2028 if (!drv) {
2029 return -ENOMEDIUM;
2030 }
2031 if (drv->bdrv_get_allocated_file_size) {
2032 return drv->bdrv_get_allocated_file_size(bs);
2033 }
2034 if (bs->file) {
2035 return bdrv_get_allocated_file_size(bs->file);
2036 }
2037 return -ENOTSUP;
2038}
2039
2040/**
bellard83f64092006-08-01 16:21:11 +00002041 * Length of a file in bytes. Return < 0 if error or unknown.
2042 */
2043int64_t bdrv_getlength(BlockDriverState *bs)
2044{
2045 BlockDriver *drv = bs->drv;
2046 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002047 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002048
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002049 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002050 if (drv->bdrv_getlength) {
2051 return drv->bdrv_getlength(bs);
2052 }
bellard83f64092006-08-01 16:21:11 +00002053 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002054 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00002055}
2056
bellard19cb3732006-08-19 11:45:59 +00002057/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00002058void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00002059{
bellard19cb3732006-08-19 11:45:59 +00002060 int64_t length;
2061 length = bdrv_getlength(bs);
2062 if (length < 0)
2063 length = 0;
2064 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01002065 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00002066 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00002067}
bellardcf989512004-02-16 21:56:36 +00002068
aliguorif3d54fc2008-11-25 21:50:24 +00002069struct partition {
2070 uint8_t boot_ind; /* 0x80 - active */
2071 uint8_t head; /* starting head */
2072 uint8_t sector; /* starting sector */
2073 uint8_t cyl; /* starting cylinder */
2074 uint8_t sys_ind; /* What partition type */
2075 uint8_t end_head; /* end head */
2076 uint8_t end_sector; /* end sector */
2077 uint8_t end_cyl; /* end cylinder */
2078 uint32_t start_sect; /* starting sector counting from 0 */
2079 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02002080} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00002081
2082/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2083static int guess_disk_lchs(BlockDriverState *bs,
2084 int *pcylinders, int *pheads, int *psectors)
2085{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002086 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00002087 int ret, i, heads, sectors, cylinders;
2088 struct partition *p;
2089 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00002090 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002091 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002092
2093 bdrv_get_geometry(bs, &nb_sectors);
2094
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002095 /**
2096 * The function will be invoked during startup not only in sync I/O mode,
2097 * but also in async I/O mode. So the I/O throttling function has to
2098 * be disabled temporarily here, not permanently.
2099 */
2100 enabled = bs->io_limits_enabled;
2101 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00002102 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002103 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002104 if (ret < 0)
2105 return -1;
2106 /* test msdos magic */
2107 if (buf[510] != 0x55 || buf[511] != 0xaa)
2108 return -1;
2109 for(i = 0; i < 4; i++) {
2110 p = ((struct partition *)(buf + 0x1be)) + i;
2111 nr_sects = le32_to_cpu(p->nr_sects);
2112 if (nr_sects && p->end_head) {
2113 /* We make the assumption that the partition terminates on
2114 a cylinder boundary */
2115 heads = p->end_head + 1;
2116 sectors = p->end_sector & 63;
2117 if (sectors == 0)
2118 continue;
2119 cylinders = nb_sectors / (heads * sectors);
2120 if (cylinders < 1 || cylinders > 16383)
2121 continue;
2122 *pheads = heads;
2123 *psectors = sectors;
2124 *pcylinders = cylinders;
2125#if 0
2126 printf("guessed geometry: LCHS=%d %d %d\n",
2127 cylinders, heads, sectors);
2128#endif
2129 return 0;
2130 }
2131 }
2132 return -1;
2133}
2134
2135void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2136{
2137 int translation, lba_detected = 0;
2138 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002139 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002140
2141 /* if a geometry hint is available, use it */
2142 bdrv_get_geometry(bs, &nb_sectors);
2143 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2144 translation = bdrv_get_translation_hint(bs);
2145 if (cylinders != 0) {
2146 *pcyls = cylinders;
2147 *pheads = heads;
2148 *psecs = secs;
2149 } else {
2150 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2151 if (heads > 16) {
2152 /* if heads > 16, it means that a BIOS LBA
2153 translation was active, so the default
2154 hardware geometry is OK */
2155 lba_detected = 1;
2156 goto default_geometry;
2157 } else {
2158 *pcyls = cylinders;
2159 *pheads = heads;
2160 *psecs = secs;
2161 /* disable any translation to be in sync with
2162 the logical geometry */
2163 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2164 bdrv_set_translation_hint(bs,
2165 BIOS_ATA_TRANSLATION_NONE);
2166 }
2167 }
2168 } else {
2169 default_geometry:
2170 /* if no geometry, use a standard physical disk geometry */
2171 cylinders = nb_sectors / (16 * 63);
2172
2173 if (cylinders > 16383)
2174 cylinders = 16383;
2175 else if (cylinders < 2)
2176 cylinders = 2;
2177 *pcyls = cylinders;
2178 *pheads = 16;
2179 *psecs = 63;
2180 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2181 if ((*pcyls * *pheads) <= 131072) {
2182 bdrv_set_translation_hint(bs,
2183 BIOS_ATA_TRANSLATION_LARGE);
2184 } else {
2185 bdrv_set_translation_hint(bs,
2186 BIOS_ATA_TRANSLATION_LBA);
2187 }
2188 }
2189 }
2190 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2191 }
2192}
2193
ths5fafdf22007-09-16 21:08:06 +00002194void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002195 int cyls, int heads, int secs)
2196{
2197 bs->cyls = cyls;
2198 bs->heads = heads;
2199 bs->secs = secs;
2200}
2201
bellard46d47672004-11-16 01:45:27 +00002202void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2203{
2204 bs->translation = translation;
2205}
2206
ths5fafdf22007-09-16 21:08:06 +00002207void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002208 int *pcyls, int *pheads, int *psecs)
2209{
2210 *pcyls = bs->cyls;
2211 *pheads = bs->heads;
2212 *psecs = bs->secs;
2213}
2214
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002215/* throttling disk io limits */
2216void bdrv_set_io_limits(BlockDriverState *bs,
2217 BlockIOLimit *io_limits)
2218{
2219 bs->io_limits = *io_limits;
2220 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2221}
2222
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002223/* Recognize floppy formats */
2224typedef struct FDFormat {
2225 FDriveType drive;
2226 uint8_t last_sect;
2227 uint8_t max_track;
2228 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002229 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002230} FDFormat;
2231
2232static const FDFormat fd_formats[] = {
2233 /* First entry is default format */
2234 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002235 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2236 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2237 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2238 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2239 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2240 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2241 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2242 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002243 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002244 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2245 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2246 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2247 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2248 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002249 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002250 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2251 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2252 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2253 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2254 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2255 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002256 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002257 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2258 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2259 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2260 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2261 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002262 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002263 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2264 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002265 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002266 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2267 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2268 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2269 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002270 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002271 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2272 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002273 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002274 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002275 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002276 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002277};
2278
2279void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2280 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002281 FDriveType drive_in, FDriveType *drive,
2282 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002283{
2284 const FDFormat *parse;
2285 uint64_t nb_sectors, size;
2286 int i, first_match, match;
2287
2288 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2289 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2290 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002291 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002292 } else {
2293 bdrv_get_geometry(bs, &nb_sectors);
2294 match = -1;
2295 first_match = -1;
2296 for (i = 0; ; i++) {
2297 parse = &fd_formats[i];
2298 if (parse->drive == FDRIVE_DRV_NONE) {
2299 break;
2300 }
2301 if (drive_in == parse->drive ||
2302 drive_in == FDRIVE_DRV_NONE) {
2303 size = (parse->max_head + 1) * parse->max_track *
2304 parse->last_sect;
2305 if (nb_sectors == size) {
2306 match = i;
2307 break;
2308 }
2309 if (first_match == -1) {
2310 first_match = i;
2311 }
2312 }
2313 }
2314 if (match == -1) {
2315 if (first_match == -1) {
2316 match = 1;
2317 } else {
2318 match = first_match;
2319 }
2320 parse = &fd_formats[match];
2321 }
2322 *nb_heads = parse->max_head + 1;
2323 *max_track = parse->max_track;
2324 *last_sect = parse->last_sect;
2325 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002326 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002327 }
2328}
2329
bellard46d47672004-11-16 01:45:27 +00002330int bdrv_get_translation_hint(BlockDriverState *bs)
2331{
2332 return bs->translation;
2333}
2334
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002335void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2336 BlockErrorAction on_write_error)
2337{
2338 bs->on_read_error = on_read_error;
2339 bs->on_write_error = on_write_error;
2340}
2341
2342BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2343{
2344 return is_read ? bs->on_read_error : bs->on_write_error;
2345}
2346
bellardb3380822004-03-14 21:38:54 +00002347int bdrv_is_read_only(BlockDriverState *bs)
2348{
2349 return bs->read_only;
2350}
2351
ths985a03b2007-12-24 16:10:43 +00002352int bdrv_is_sg(BlockDriverState *bs)
2353{
2354 return bs->sg;
2355}
2356
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002357int bdrv_enable_write_cache(BlockDriverState *bs)
2358{
2359 return bs->enable_write_cache;
2360}
2361
bellardea2384d2004-08-01 21:59:26 +00002362int bdrv_is_encrypted(BlockDriverState *bs)
2363{
2364 if (bs->backing_hd && bs->backing_hd->encrypted)
2365 return 1;
2366 return bs->encrypted;
2367}
2368
aliguoric0f4ce72009-03-05 23:01:01 +00002369int bdrv_key_required(BlockDriverState *bs)
2370{
2371 BlockDriverState *backing_hd = bs->backing_hd;
2372
2373 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2374 return 1;
2375 return (bs->encrypted && !bs->valid_key);
2376}
2377
bellardea2384d2004-08-01 21:59:26 +00002378int bdrv_set_key(BlockDriverState *bs, const char *key)
2379{
2380 int ret;
2381 if (bs->backing_hd && bs->backing_hd->encrypted) {
2382 ret = bdrv_set_key(bs->backing_hd, key);
2383 if (ret < 0)
2384 return ret;
2385 if (!bs->encrypted)
2386 return 0;
2387 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002388 if (!bs->encrypted) {
2389 return -EINVAL;
2390 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2391 return -ENOMEDIUM;
2392 }
aliguoric0f4ce72009-03-05 23:01:01 +00002393 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002394 if (ret < 0) {
2395 bs->valid_key = 0;
2396 } else if (!bs->valid_key) {
2397 bs->valid_key = 1;
2398 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002399 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002400 }
aliguoric0f4ce72009-03-05 23:01:01 +00002401 return ret;
bellardea2384d2004-08-01 21:59:26 +00002402}
2403
2404void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2405{
bellard19cb3732006-08-19 11:45:59 +00002406 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002407 buf[0] = '\0';
2408 } else {
2409 pstrcpy(buf, buf_size, bs->drv->format_name);
2410 }
2411}
2412
ths5fafdf22007-09-16 21:08:06 +00002413void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002414 void *opaque)
2415{
2416 BlockDriver *drv;
2417
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002418 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002419 it(opaque, drv->format_name);
2420 }
2421}
2422
bellardb3380822004-03-14 21:38:54 +00002423BlockDriverState *bdrv_find(const char *name)
2424{
2425 BlockDriverState *bs;
2426
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002427 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2428 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002429 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002430 }
bellardb3380822004-03-14 21:38:54 +00002431 }
2432 return NULL;
2433}
2434
Markus Armbruster2f399b02010-06-02 18:55:20 +02002435BlockDriverState *bdrv_next(BlockDriverState *bs)
2436{
2437 if (!bs) {
2438 return QTAILQ_FIRST(&bdrv_states);
2439 }
2440 return QTAILQ_NEXT(bs, list);
2441}
2442
aliguori51de9762009-03-05 23:00:43 +00002443void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002444{
2445 BlockDriverState *bs;
2446
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002447 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002448 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002449 }
2450}
2451
bellardea2384d2004-08-01 21:59:26 +00002452const char *bdrv_get_device_name(BlockDriverState *bs)
2453{
2454 return bs->device_name;
2455}
2456
aliguoric6ca28d2008-10-06 13:55:43 +00002457void bdrv_flush_all(void)
2458{
2459 BlockDriverState *bs;
2460
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002461 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002462 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002463 }
aliguoric6ca28d2008-10-06 13:55:43 +00002464}
2465
Kevin Wolff2feebb2010-04-14 17:30:35 +02002466int bdrv_has_zero_init(BlockDriverState *bs)
2467{
2468 assert(bs->drv);
2469
Kevin Wolf336c1c12010-07-28 11:26:29 +02002470 if (bs->drv->bdrv_has_zero_init) {
2471 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002472 }
2473
2474 return 1;
2475}
2476
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002477typedef struct BdrvCoIsAllocatedData {
2478 BlockDriverState *bs;
2479 int64_t sector_num;
2480 int nb_sectors;
2481 int *pnum;
2482 int ret;
2483 bool done;
2484} BdrvCoIsAllocatedData;
2485
thsf58c7b32008-06-05 21:53:49 +00002486/*
2487 * Returns true iff the specified sector is present in the disk image. Drivers
2488 * not implementing the functionality are assumed to not support backing files,
2489 * hence all their sectors are reported as allocated.
2490 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002491 * If 'sector_num' is beyond the end of the disk image the return value is 0
2492 * and 'pnum' is set to 0.
2493 *
thsf58c7b32008-06-05 21:53:49 +00002494 * 'pnum' is set to the number of sectors (including and immediately following
2495 * the specified sector) that are known to be in the same
2496 * allocated/unallocated state.
2497 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002498 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2499 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002500 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002501int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2502 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002503{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002504 int64_t n;
2505
2506 if (sector_num >= bs->total_sectors) {
2507 *pnum = 0;
2508 return 0;
2509 }
2510
2511 n = bs->total_sectors - sector_num;
2512 if (n < nb_sectors) {
2513 nb_sectors = n;
2514 }
2515
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002516 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002517 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002518 return 1;
2519 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002520
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002521 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2522}
2523
2524/* Coroutine wrapper for bdrv_is_allocated() */
2525static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2526{
2527 BdrvCoIsAllocatedData *data = opaque;
2528 BlockDriverState *bs = data->bs;
2529
2530 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2531 data->pnum);
2532 data->done = true;
2533}
2534
2535/*
2536 * Synchronous wrapper around bdrv_co_is_allocated().
2537 *
2538 * See bdrv_co_is_allocated() for details.
2539 */
2540int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2541 int *pnum)
2542{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002543 Coroutine *co;
2544 BdrvCoIsAllocatedData data = {
2545 .bs = bs,
2546 .sector_num = sector_num,
2547 .nb_sectors = nb_sectors,
2548 .pnum = pnum,
2549 .done = false,
2550 };
2551
2552 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2553 qemu_coroutine_enter(co, &data);
2554 while (!data.done) {
2555 qemu_aio_wait();
2556 }
2557 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002558}
2559
Luiz Capitulinob2023812011-09-21 17:16:47 -03002560BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002561{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002562 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002563 BlockDriverState *bs;
2564
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002565 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002566 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002567
Luiz Capitulinob2023812011-09-21 17:16:47 -03002568 info->value = g_malloc0(sizeof(*info->value));
2569 info->value->device = g_strdup(bs->device_name);
2570 info->value->type = g_strdup("unknown");
2571 info->value->locked = bdrv_dev_is_medium_locked(bs);
2572 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002573
Markus Armbrustere4def802011-09-06 18:58:53 +02002574 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002575 info->value->has_tray_open = true;
2576 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002577 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002578
2579 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002580 info->value->has_io_status = true;
2581 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002582 }
2583
bellard19cb3732006-08-19 11:45:59 +00002584 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002585 info->value->has_inserted = true;
2586 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2587 info->value->inserted->file = g_strdup(bs->filename);
2588 info->value->inserted->ro = bs->read_only;
2589 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2590 info->value->inserted->encrypted = bs->encrypted;
2591 if (bs->backing_file[0]) {
2592 info->value->inserted->has_backing_file = true;
2593 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002594 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002595
2596 if (bs->io_limits_enabled) {
2597 info->value->inserted->bps =
2598 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2599 info->value->inserted->bps_rd =
2600 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2601 info->value->inserted->bps_wr =
2602 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2603 info->value->inserted->iops =
2604 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2605 info->value->inserted->iops_rd =
2606 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2607 info->value->inserted->iops_wr =
2608 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2609 }
bellardb3380822004-03-14 21:38:54 +00002610 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002611
2612 /* XXX: waiting for the qapi to support GSList */
2613 if (!cur_item) {
2614 head = cur_item = info;
2615 } else {
2616 cur_item->next = info;
2617 cur_item = info;
2618 }
bellardb3380822004-03-14 21:38:54 +00002619 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002620
Luiz Capitulinob2023812011-09-21 17:16:47 -03002621 return head;
bellardb3380822004-03-14 21:38:54 +00002622}
thsa36e69d2007-12-02 05:18:19 +00002623
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002624/* Consider exposing this as a full fledged QMP command */
2625static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002626{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002627 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002628
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002629 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002630
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002631 if (bs->device_name[0]) {
2632 s->has_device = true;
2633 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002634 }
2635
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002636 s->stats = g_malloc0(sizeof(*s->stats));
2637 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2638 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2639 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2640 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2641 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2642 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2643 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2644 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2645 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2646
Kevin Wolf294cc352010-04-28 14:34:01 +02002647 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002648 s->has_parent = true;
2649 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002650 }
2651
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002652 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002653}
2654
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002655BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002656{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002657 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002658 BlockDriverState *bs;
2659
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002660 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002661 BlockStatsList *info = g_malloc0(sizeof(*info));
2662 info->value = qmp_query_blockstat(bs, NULL);
2663
2664 /* XXX: waiting for the qapi to support GSList */
2665 if (!cur_item) {
2666 head = cur_item = info;
2667 } else {
2668 cur_item->next = info;
2669 cur_item = info;
2670 }
thsa36e69d2007-12-02 05:18:19 +00002671 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002672
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002673 return head;
thsa36e69d2007-12-02 05:18:19 +00002674}
bellardea2384d2004-08-01 21:59:26 +00002675
aliguori045df332009-03-05 23:00:48 +00002676const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2677{
2678 if (bs->backing_hd && bs->backing_hd->encrypted)
2679 return bs->backing_file;
2680 else if (bs->encrypted)
2681 return bs->filename;
2682 else
2683 return NULL;
2684}
2685
ths5fafdf22007-09-16 21:08:06 +00002686void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002687 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002688{
Kevin Wolf3574c602011-10-26 11:02:11 +02002689 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002690}
2691
ths5fafdf22007-09-16 21:08:06 +00002692int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002693 const uint8_t *buf, int nb_sectors)
2694{
2695 BlockDriver *drv = bs->drv;
2696 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002697 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002698 if (!drv->bdrv_write_compressed)
2699 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002700 if (bdrv_check_request(bs, sector_num, nb_sectors))
2701 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002702
Jan Kiszkac6d22832009-11-30 18:21:20 +01002703 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002704 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2705 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002706
bellardfaea38e2006-08-05 21:31:00 +00002707 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2708}
ths3b46e622007-09-17 08:09:54 +00002709
bellardfaea38e2006-08-05 21:31:00 +00002710int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2711{
2712 BlockDriver *drv = bs->drv;
2713 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002714 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002715 if (!drv->bdrv_get_info)
2716 return -ENOTSUP;
2717 memset(bdi, 0, sizeof(*bdi));
2718 return drv->bdrv_get_info(bs, bdi);
2719}
2720
Christoph Hellwig45566e92009-07-10 23:11:57 +02002721int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2722 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002723{
2724 BlockDriver *drv = bs->drv;
2725 if (!drv)
2726 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002727 if (drv->bdrv_save_vmstate)
2728 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2729 if (bs->file)
2730 return bdrv_save_vmstate(bs->file, buf, pos, size);
2731 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002732}
2733
Christoph Hellwig45566e92009-07-10 23:11:57 +02002734int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2735 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002736{
2737 BlockDriver *drv = bs->drv;
2738 if (!drv)
2739 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002740 if (drv->bdrv_load_vmstate)
2741 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2742 if (bs->file)
2743 return bdrv_load_vmstate(bs->file, buf, pos, size);
2744 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002745}
2746
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002747void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2748{
2749 BlockDriver *drv = bs->drv;
2750
2751 if (!drv || !drv->bdrv_debug_event) {
2752 return;
2753 }
2754
2755 return drv->bdrv_debug_event(bs, event);
2756
2757}
2758
bellardfaea38e2006-08-05 21:31:00 +00002759/**************************************************************/
2760/* handling of snapshots */
2761
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002762int bdrv_can_snapshot(BlockDriverState *bs)
2763{
2764 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002765 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002766 return 0;
2767 }
2768
2769 if (!drv->bdrv_snapshot_create) {
2770 if (bs->file != NULL) {
2771 return bdrv_can_snapshot(bs->file);
2772 }
2773 return 0;
2774 }
2775
2776 return 1;
2777}
2778
Blue Swirl199630b2010-07-25 20:49:34 +00002779int bdrv_is_snapshot(BlockDriverState *bs)
2780{
2781 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2782}
2783
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002784BlockDriverState *bdrv_snapshots(void)
2785{
2786 BlockDriverState *bs;
2787
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002788 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002789 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002790 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002791
2792 bs = NULL;
2793 while ((bs = bdrv_next(bs))) {
2794 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002795 bs_snapshots = bs;
2796 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002797 }
2798 }
2799 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002800}
2801
ths5fafdf22007-09-16 21:08:06 +00002802int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002803 QEMUSnapshotInfo *sn_info)
2804{
2805 BlockDriver *drv = bs->drv;
2806 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002807 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002808 if (drv->bdrv_snapshot_create)
2809 return drv->bdrv_snapshot_create(bs, sn_info);
2810 if (bs->file)
2811 return bdrv_snapshot_create(bs->file, sn_info);
2812 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002813}
2814
ths5fafdf22007-09-16 21:08:06 +00002815int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002816 const char *snapshot_id)
2817{
2818 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002819 int ret, open_ret;
2820
bellardfaea38e2006-08-05 21:31:00 +00002821 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002822 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002823 if (drv->bdrv_snapshot_goto)
2824 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2825
2826 if (bs->file) {
2827 drv->bdrv_close(bs);
2828 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2829 open_ret = drv->bdrv_open(bs, bs->open_flags);
2830 if (open_ret < 0) {
2831 bdrv_delete(bs->file);
2832 bs->drv = NULL;
2833 return open_ret;
2834 }
2835 return ret;
2836 }
2837
2838 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002839}
2840
2841int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2842{
2843 BlockDriver *drv = bs->drv;
2844 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002845 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002846 if (drv->bdrv_snapshot_delete)
2847 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2848 if (bs->file)
2849 return bdrv_snapshot_delete(bs->file, snapshot_id);
2850 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002851}
2852
ths5fafdf22007-09-16 21:08:06 +00002853int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002854 QEMUSnapshotInfo **psn_info)
2855{
2856 BlockDriver *drv = bs->drv;
2857 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002858 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002859 if (drv->bdrv_snapshot_list)
2860 return drv->bdrv_snapshot_list(bs, psn_info);
2861 if (bs->file)
2862 return bdrv_snapshot_list(bs->file, psn_info);
2863 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002864}
2865
edison51ef6722010-09-21 19:58:41 -07002866int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2867 const char *snapshot_name)
2868{
2869 BlockDriver *drv = bs->drv;
2870 if (!drv) {
2871 return -ENOMEDIUM;
2872 }
2873 if (!bs->read_only) {
2874 return -EINVAL;
2875 }
2876 if (drv->bdrv_snapshot_load_tmp) {
2877 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2878 }
2879 return -ENOTSUP;
2880}
2881
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002882BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2883 const char *backing_file)
2884{
2885 if (!bs->drv) {
2886 return NULL;
2887 }
2888
2889 if (bs->backing_hd) {
2890 if (strcmp(bs->backing_file, backing_file) == 0) {
2891 return bs->backing_hd;
2892 } else {
2893 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2894 }
2895 }
2896
2897 return NULL;
2898}
2899
bellardfaea38e2006-08-05 21:31:00 +00002900#define NB_SUFFIXES 4
2901
2902char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2903{
2904 static const char suffixes[NB_SUFFIXES] = "KMGT";
2905 int64_t base;
2906 int i;
2907
2908 if (size <= 999) {
2909 snprintf(buf, buf_size, "%" PRId64, size);
2910 } else {
2911 base = 1024;
2912 for(i = 0; i < NB_SUFFIXES; i++) {
2913 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002914 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002915 (double)size / base,
2916 suffixes[i]);
2917 break;
2918 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002919 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002920 ((size + (base >> 1)) / base),
2921 suffixes[i]);
2922 break;
2923 }
2924 base = base * 1024;
2925 }
2926 }
2927 return buf;
2928}
2929
2930char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2931{
2932 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002933#ifdef _WIN32
2934 struct tm *ptm;
2935#else
bellardfaea38e2006-08-05 21:31:00 +00002936 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002937#endif
bellardfaea38e2006-08-05 21:31:00 +00002938 time_t ti;
2939 int64_t secs;
2940
2941 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002942 snprintf(buf, buf_size,
2943 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002944 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2945 } else {
2946 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002947#ifdef _WIN32
2948 ptm = localtime(&ti);
2949 strftime(date_buf, sizeof(date_buf),
2950 "%Y-%m-%d %H:%M:%S", ptm);
2951#else
bellardfaea38e2006-08-05 21:31:00 +00002952 localtime_r(&ti, &tm);
2953 strftime(date_buf, sizeof(date_buf),
2954 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002955#endif
bellardfaea38e2006-08-05 21:31:00 +00002956 secs = sn->vm_clock_nsec / 1000000000;
2957 snprintf(clock_buf, sizeof(clock_buf),
2958 "%02d:%02d:%02d.%03d",
2959 (int)(secs / 3600),
2960 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002961 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002962 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2963 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002964 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002965 sn->id_str, sn->name,
2966 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2967 date_buf,
2968 clock_buf);
2969 }
2970 return buf;
2971}
2972
bellard83f64092006-08-01 16:21:11 +00002973/**************************************************************/
2974/* async I/Os */
2975
aliguori3b69e4b2009-01-22 16:59:24 +00002976BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002977 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002978 BlockDriverCompletionFunc *cb, void *opaque)
2979{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002980 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2981
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002982 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002983 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002984}
2985
aliguorif141eaf2009-04-07 18:43:24 +00002986BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2987 QEMUIOVector *qiov, int nb_sectors,
2988 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002989{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002990 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2991
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002992 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002993 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002994}
2995
Kevin Wolf40b4f532009-09-09 17:53:37 +02002996
2997typedef struct MultiwriteCB {
2998 int error;
2999 int num_requests;
3000 int num_callbacks;
3001 struct {
3002 BlockDriverCompletionFunc *cb;
3003 void *opaque;
3004 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003005 } callbacks[];
3006} MultiwriteCB;
3007
3008static void multiwrite_user_cb(MultiwriteCB *mcb)
3009{
3010 int i;
3011
3012 for (i = 0; i < mcb->num_callbacks; i++) {
3013 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01003014 if (mcb->callbacks[i].free_qiov) {
3015 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3016 }
Anthony Liguori7267c092011-08-20 22:09:37 -05003017 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003018 }
3019}
3020
3021static void multiwrite_cb(void *opaque, int ret)
3022{
3023 MultiwriteCB *mcb = opaque;
3024
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003025 trace_multiwrite_cb(mcb, ret);
3026
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02003027 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02003028 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003029 }
3030
3031 mcb->num_requests--;
3032 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02003033 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05003034 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003035 }
3036}
3037
3038static int multiwrite_req_compare(const void *a, const void *b)
3039{
Christoph Hellwig77be4362010-05-19 20:53:10 +02003040 const BlockRequest *req1 = a, *req2 = b;
3041
3042 /*
3043 * Note that we can't simply subtract req2->sector from req1->sector
3044 * here as that could overflow the return value.
3045 */
3046 if (req1->sector > req2->sector) {
3047 return 1;
3048 } else if (req1->sector < req2->sector) {
3049 return -1;
3050 } else {
3051 return 0;
3052 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02003053}
3054
3055/*
3056 * Takes a bunch of requests and tries to merge them. Returns the number of
3057 * requests that remain after merging.
3058 */
3059static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3060 int num_reqs, MultiwriteCB *mcb)
3061{
3062 int i, outidx;
3063
3064 // Sort requests by start sector
3065 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3066
3067 // Check if adjacent requests touch the same clusters. If so, combine them,
3068 // filling up gaps with zero sectors.
3069 outidx = 0;
3070 for (i = 1; i < num_reqs; i++) {
3071 int merge = 0;
3072 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3073
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003074 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02003075 if (reqs[i].sector <= oldreq_last) {
3076 merge = 1;
3077 }
3078
Christoph Hellwige2a305f2010-01-26 14:49:08 +01003079 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3080 merge = 0;
3081 }
3082
Kevin Wolf40b4f532009-09-09 17:53:37 +02003083 if (merge) {
3084 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05003085 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003086 qemu_iovec_init(qiov,
3087 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3088
3089 // Add the first request to the merged one. If the requests are
3090 // overlapping, drop the last sectors of the first request.
3091 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3092 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3093
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003094 // We should need to add any zeros between the two requests
3095 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003096
3097 // Add the second request
3098 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3099
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003100 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003101 reqs[outidx].qiov = qiov;
3102
3103 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3104 } else {
3105 outidx++;
3106 reqs[outidx].sector = reqs[i].sector;
3107 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3108 reqs[outidx].qiov = reqs[i].qiov;
3109 }
3110 }
3111
3112 return outidx + 1;
3113}
3114
3115/*
3116 * Submit multiple AIO write requests at once.
3117 *
3118 * On success, the function returns 0 and all requests in the reqs array have
3119 * been submitted. In error case this function returns -1, and any of the
3120 * requests may or may not be submitted yet. In particular, this means that the
3121 * callback will be called for some of the requests, for others it won't. The
3122 * caller must check the error field of the BlockRequest to wait for the right
3123 * callbacks (if error != 0, no callback will be called).
3124 *
3125 * The implementation may modify the contents of the reqs array, e.g. to merge
3126 * requests. However, the fields opaque and error are left unmodified as they
3127 * are used to signal failure for a single request to the caller.
3128 */
3129int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3130{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003131 MultiwriteCB *mcb;
3132 int i;
3133
Ryan Harper301db7c2011-03-07 10:01:04 -06003134 /* don't submit writes if we don't have a medium */
3135 if (bs->drv == NULL) {
3136 for (i = 0; i < num_reqs; i++) {
3137 reqs[i].error = -ENOMEDIUM;
3138 }
3139 return -1;
3140 }
3141
Kevin Wolf40b4f532009-09-09 17:53:37 +02003142 if (num_reqs == 0) {
3143 return 0;
3144 }
3145
3146 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003147 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003148 mcb->num_requests = 0;
3149 mcb->num_callbacks = num_reqs;
3150
3151 for (i = 0; i < num_reqs; i++) {
3152 mcb->callbacks[i].cb = reqs[i].cb;
3153 mcb->callbacks[i].opaque = reqs[i].opaque;
3154 }
3155
3156 // Check for mergable requests
3157 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3158
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003159 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3160
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003161 /* Run the aio requests. */
3162 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003163 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003164 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003165 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003166 }
3167
3168 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003169}
3170
bellard83f64092006-08-01 16:21:11 +00003171void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003172{
aliguori6bbff9a2009-03-20 18:25:59 +00003173 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003174}
3175
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003176/* block I/O throttling */
3177static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3178 bool is_write, double elapsed_time, uint64_t *wait)
3179{
3180 uint64_t bps_limit = 0;
3181 double bytes_limit, bytes_base, bytes_res;
3182 double slice_time, wait_time;
3183
3184 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3185 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3186 } else if (bs->io_limits.bps[is_write]) {
3187 bps_limit = bs->io_limits.bps[is_write];
3188 } else {
3189 if (wait) {
3190 *wait = 0;
3191 }
3192
3193 return false;
3194 }
3195
3196 slice_time = bs->slice_end - bs->slice_start;
3197 slice_time /= (NANOSECONDS_PER_SECOND);
3198 bytes_limit = bps_limit * slice_time;
3199 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3200 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3201 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3202 }
3203
3204 /* bytes_base: the bytes of data which have been read/written; and
3205 * it is obtained from the history statistic info.
3206 * bytes_res: the remaining bytes of data which need to be read/written.
3207 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3208 * the total time for completing reading/writting all data.
3209 */
3210 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3211
3212 if (bytes_base + bytes_res <= bytes_limit) {
3213 if (wait) {
3214 *wait = 0;
3215 }
3216
3217 return false;
3218 }
3219
3220 /* Calc approx time to dispatch */
3221 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3222
3223 /* When the I/O rate at runtime exceeds the limits,
3224 * bs->slice_end need to be extended in order that the current statistic
3225 * info can be kept until the timer fire, so it is increased and tuned
3226 * based on the result of experiment.
3227 */
3228 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3229 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3230 if (wait) {
3231 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3232 }
3233
3234 return true;
3235}
3236
3237static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3238 double elapsed_time, uint64_t *wait)
3239{
3240 uint64_t iops_limit = 0;
3241 double ios_limit, ios_base;
3242 double slice_time, wait_time;
3243
3244 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3245 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3246 } else if (bs->io_limits.iops[is_write]) {
3247 iops_limit = bs->io_limits.iops[is_write];
3248 } else {
3249 if (wait) {
3250 *wait = 0;
3251 }
3252
3253 return false;
3254 }
3255
3256 slice_time = bs->slice_end - bs->slice_start;
3257 slice_time /= (NANOSECONDS_PER_SECOND);
3258 ios_limit = iops_limit * slice_time;
3259 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3260 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3261 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3262 }
3263
3264 if (ios_base + 1 <= ios_limit) {
3265 if (wait) {
3266 *wait = 0;
3267 }
3268
3269 return false;
3270 }
3271
3272 /* Calc approx time to dispatch */
3273 wait_time = (ios_base + 1) / iops_limit;
3274 if (wait_time > elapsed_time) {
3275 wait_time = wait_time - elapsed_time;
3276 } else {
3277 wait_time = 0;
3278 }
3279
3280 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3281 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3282 if (wait) {
3283 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3284 }
3285
3286 return true;
3287}
3288
3289static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3290 bool is_write, int64_t *wait)
3291{
3292 int64_t now, max_wait;
3293 uint64_t bps_wait = 0, iops_wait = 0;
3294 double elapsed_time;
3295 int bps_ret, iops_ret;
3296
3297 now = qemu_get_clock_ns(vm_clock);
3298 if ((bs->slice_start < now)
3299 && (bs->slice_end > now)) {
3300 bs->slice_end = now + bs->slice_time;
3301 } else {
3302 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3303 bs->slice_start = now;
3304 bs->slice_end = now + bs->slice_time;
3305
3306 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3307 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3308
3309 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3310 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3311 }
3312
3313 elapsed_time = now - bs->slice_start;
3314 elapsed_time /= (NANOSECONDS_PER_SECOND);
3315
3316 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3317 is_write, elapsed_time, &bps_wait);
3318 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3319 elapsed_time, &iops_wait);
3320 if (bps_ret || iops_ret) {
3321 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3322 if (wait) {
3323 *wait = max_wait;
3324 }
3325
3326 now = qemu_get_clock_ns(vm_clock);
3327 if (bs->slice_end < now + max_wait) {
3328 bs->slice_end = now + max_wait;
3329 }
3330
3331 return true;
3332 }
3333
3334 if (wait) {
3335 *wait = 0;
3336 }
3337
3338 return false;
3339}
pbrookce1a14d2006-08-07 02:38:06 +00003340
bellard83f64092006-08-01 16:21:11 +00003341/**************************************************************/
3342/* async block device emulation */
3343
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003344typedef struct BlockDriverAIOCBSync {
3345 BlockDriverAIOCB common;
3346 QEMUBH *bh;
3347 int ret;
3348 /* vector translation state */
3349 QEMUIOVector *qiov;
3350 uint8_t *bounce;
3351 int is_write;
3352} BlockDriverAIOCBSync;
3353
3354static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3355{
Kevin Wolfb666d232010-05-05 11:44:39 +02003356 BlockDriverAIOCBSync *acb =
3357 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003358 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003359 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003360 qemu_aio_release(acb);
3361}
3362
3363static AIOPool bdrv_em_aio_pool = {
3364 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3365 .cancel = bdrv_aio_cancel_em,
3366};
3367
bellard83f64092006-08-01 16:21:11 +00003368static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003369{
pbrookce1a14d2006-08-07 02:38:06 +00003370 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003371
aliguorif141eaf2009-04-07 18:43:24 +00003372 if (!acb->is_write)
3373 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003374 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003375 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003376 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003377 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003378 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003379}
bellardbeac80c2006-06-26 20:08:57 +00003380
aliguorif141eaf2009-04-07 18:43:24 +00003381static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3382 int64_t sector_num,
3383 QEMUIOVector *qiov,
3384 int nb_sectors,
3385 BlockDriverCompletionFunc *cb,
3386 void *opaque,
3387 int is_write)
3388
bellardea2384d2004-08-01 21:59:26 +00003389{
pbrookce1a14d2006-08-07 02:38:06 +00003390 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003391
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003392 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003393 acb->is_write = is_write;
3394 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003395 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003396 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003397
3398 if (is_write) {
3399 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003400 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003401 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003402 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003403 }
3404
pbrookce1a14d2006-08-07 02:38:06 +00003405 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003406
pbrookce1a14d2006-08-07 02:38:06 +00003407 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003408}
3409
aliguorif141eaf2009-04-07 18:43:24 +00003410static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3411 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003412 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003413{
aliguorif141eaf2009-04-07 18:43:24 +00003414 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003415}
3416
aliguorif141eaf2009-04-07 18:43:24 +00003417static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3418 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3419 BlockDriverCompletionFunc *cb, void *opaque)
3420{
3421 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3422}
3423
Kevin Wolf68485422011-06-30 10:05:46 +02003424
3425typedef struct BlockDriverAIOCBCoroutine {
3426 BlockDriverAIOCB common;
3427 BlockRequest req;
3428 bool is_write;
3429 QEMUBH* bh;
3430} BlockDriverAIOCBCoroutine;
3431
3432static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3433{
3434 qemu_aio_flush();
3435}
3436
3437static AIOPool bdrv_em_co_aio_pool = {
3438 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3439 .cancel = bdrv_aio_co_cancel_em,
3440};
3441
Paolo Bonzini35246a62011-10-14 10:41:29 +02003442static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003443{
3444 BlockDriverAIOCBCoroutine *acb = opaque;
3445
3446 acb->common.cb(acb->common.opaque, acb->req.error);
3447 qemu_bh_delete(acb->bh);
3448 qemu_aio_release(acb);
3449}
3450
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003451/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3452static void coroutine_fn bdrv_co_do_rw(void *opaque)
3453{
3454 BlockDriverAIOCBCoroutine *acb = opaque;
3455 BlockDriverState *bs = acb->common.bs;
3456
3457 if (!acb->is_write) {
3458 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003459 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003460 } else {
3461 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003462 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003463 }
3464
Paolo Bonzini35246a62011-10-14 10:41:29 +02003465 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003466 qemu_bh_schedule(acb->bh);
3467}
3468
Kevin Wolf68485422011-06-30 10:05:46 +02003469static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3470 int64_t sector_num,
3471 QEMUIOVector *qiov,
3472 int nb_sectors,
3473 BlockDriverCompletionFunc *cb,
3474 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003475 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003476{
3477 Coroutine *co;
3478 BlockDriverAIOCBCoroutine *acb;
3479
3480 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3481 acb->req.sector = sector_num;
3482 acb->req.nb_sectors = nb_sectors;
3483 acb->req.qiov = qiov;
3484 acb->is_write = is_write;
3485
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003486 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003487 qemu_coroutine_enter(co, acb);
3488
3489 return &acb->common;
3490}
3491
Paolo Bonzini07f07612011-10-17 12:32:12 +02003492static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003493{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003494 BlockDriverAIOCBCoroutine *acb = opaque;
3495 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003496
Paolo Bonzini07f07612011-10-17 12:32:12 +02003497 acb->req.error = bdrv_co_flush(bs);
3498 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003499 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003500}
3501
Paolo Bonzini07f07612011-10-17 12:32:12 +02003502BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003503 BlockDriverCompletionFunc *cb, void *opaque)
3504{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003505 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003506
Paolo Bonzini07f07612011-10-17 12:32:12 +02003507 Coroutine *co;
3508 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003509
Paolo Bonzini07f07612011-10-17 12:32:12 +02003510 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3511 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3512 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003513
Alexander Graf016f5cf2010-05-26 17:51:49 +02003514 return &acb->common;
3515}
3516
Paolo Bonzini4265d622011-10-17 12:32:14 +02003517static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3518{
3519 BlockDriverAIOCBCoroutine *acb = opaque;
3520 BlockDriverState *bs = acb->common.bs;
3521
3522 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3523 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3524 qemu_bh_schedule(acb->bh);
3525}
3526
3527BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3528 int64_t sector_num, int nb_sectors,
3529 BlockDriverCompletionFunc *cb, void *opaque)
3530{
3531 Coroutine *co;
3532 BlockDriverAIOCBCoroutine *acb;
3533
3534 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3535
3536 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3537 acb->req.sector = sector_num;
3538 acb->req.nb_sectors = nb_sectors;
3539 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3540 qemu_coroutine_enter(co, acb);
3541
3542 return &acb->common;
3543}
3544
bellardea2384d2004-08-01 21:59:26 +00003545void bdrv_init(void)
3546{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003547 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003548}
pbrookce1a14d2006-08-07 02:38:06 +00003549
Markus Armbrustereb852012009-10-27 18:41:44 +01003550void bdrv_init_with_whitelist(void)
3551{
3552 use_bdrv_whitelist = 1;
3553 bdrv_init();
3554}
3555
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003556void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3557 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003558{
pbrookce1a14d2006-08-07 02:38:06 +00003559 BlockDriverAIOCB *acb;
3560
aliguori6bbff9a2009-03-20 18:25:59 +00003561 if (pool->free_aiocb) {
3562 acb = pool->free_aiocb;
3563 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003564 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003565 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003566 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003567 }
3568 acb->bs = bs;
3569 acb->cb = cb;
3570 acb->opaque = opaque;
3571 return acb;
3572}
3573
3574void qemu_aio_release(void *p)
3575{
aliguori6bbff9a2009-03-20 18:25:59 +00003576 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3577 AIOPool *pool = acb->pool;
3578 acb->next = pool->free_aiocb;
3579 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003580}
bellard19cb3732006-08-19 11:45:59 +00003581
3582/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003583/* Coroutine block device emulation */
3584
3585typedef struct CoroutineIOCompletion {
3586 Coroutine *coroutine;
3587 int ret;
3588} CoroutineIOCompletion;
3589
3590static void bdrv_co_io_em_complete(void *opaque, int ret)
3591{
3592 CoroutineIOCompletion *co = opaque;
3593
3594 co->ret = ret;
3595 qemu_coroutine_enter(co->coroutine, NULL);
3596}
3597
3598static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3599 int nb_sectors, QEMUIOVector *iov,
3600 bool is_write)
3601{
3602 CoroutineIOCompletion co = {
3603 .coroutine = qemu_coroutine_self(),
3604 };
3605 BlockDriverAIOCB *acb;
3606
3607 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003608 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3609 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003610 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003611 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3612 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003613 }
3614
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003615 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003616 if (!acb) {
3617 return -EIO;
3618 }
3619 qemu_coroutine_yield();
3620
3621 return co.ret;
3622}
3623
3624static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3625 int64_t sector_num, int nb_sectors,
3626 QEMUIOVector *iov)
3627{
3628 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3629}
3630
3631static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3632 int64_t sector_num, int nb_sectors,
3633 QEMUIOVector *iov)
3634{
3635 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3636}
3637
Paolo Bonzini07f07612011-10-17 12:32:12 +02003638static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003639{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003640 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003641
Paolo Bonzini07f07612011-10-17 12:32:12 +02003642 rwco->ret = bdrv_co_flush(rwco->bs);
3643}
3644
3645int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3646{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003647 int ret;
3648
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003649 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003650 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003651 }
3652
Kevin Wolfca716362011-11-10 18:13:59 +01003653 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003654 if (bs->drv->bdrv_co_flush_to_os) {
3655 ret = bs->drv->bdrv_co_flush_to_os(bs);
3656 if (ret < 0) {
3657 return ret;
3658 }
3659 }
3660
Kevin Wolfca716362011-11-10 18:13:59 +01003661 /* But don't actually force it to the disk with cache=unsafe */
3662 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3663 return 0;
3664 }
3665
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003666 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003667 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003668 } else if (bs->drv->bdrv_aio_flush) {
3669 BlockDriverAIOCB *acb;
3670 CoroutineIOCompletion co = {
3671 .coroutine = qemu_coroutine_self(),
3672 };
3673
3674 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3675 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003676 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003677 } else {
3678 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003679 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003680 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003681 } else {
3682 /*
3683 * Some block drivers always operate in either writethrough or unsafe
3684 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3685 * know how the server works (because the behaviour is hardcoded or
3686 * depends on server-side configuration), so we can't ensure that
3687 * everything is safe on disk. Returning an error doesn't work because
3688 * that would break guests even if the server operates in writethrough
3689 * mode.
3690 *
3691 * Let's hope the user knows what he's doing.
3692 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003693 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003694 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003695 if (ret < 0) {
3696 return ret;
3697 }
3698
3699 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3700 * in the case of cache=unsafe, so there are no useless flushes.
3701 */
3702 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003703}
3704
Anthony Liguori0f154232011-11-14 15:09:45 -06003705void bdrv_invalidate_cache(BlockDriverState *bs)
3706{
3707 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3708 bs->drv->bdrv_invalidate_cache(bs);
3709 }
3710}
3711
3712void bdrv_invalidate_cache_all(void)
3713{
3714 BlockDriverState *bs;
3715
3716 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3717 bdrv_invalidate_cache(bs);
3718 }
3719}
3720
Benoît Canet07789262012-03-23 08:36:49 +01003721void bdrv_clear_incoming_migration_all(void)
3722{
3723 BlockDriverState *bs;
3724
3725 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3726 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3727 }
3728}
3729
Paolo Bonzini07f07612011-10-17 12:32:12 +02003730int bdrv_flush(BlockDriverState *bs)
3731{
3732 Coroutine *co;
3733 RwCo rwco = {
3734 .bs = bs,
3735 .ret = NOT_DONE,
3736 };
3737
3738 if (qemu_in_coroutine()) {
3739 /* Fast-path if already in coroutine context */
3740 bdrv_flush_co_entry(&rwco);
3741 } else {
3742 co = qemu_coroutine_create(bdrv_flush_co_entry);
3743 qemu_coroutine_enter(co, &rwco);
3744 while (rwco.ret == NOT_DONE) {
3745 qemu_aio_wait();
3746 }
3747 }
3748
3749 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003750}
3751
Paolo Bonzini4265d622011-10-17 12:32:14 +02003752static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3753{
3754 RwCo *rwco = opaque;
3755
3756 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3757}
3758
3759int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3760 int nb_sectors)
3761{
3762 if (!bs->drv) {
3763 return -ENOMEDIUM;
3764 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3765 return -EIO;
3766 } else if (bs->read_only) {
3767 return -EROFS;
3768 } else if (bs->drv->bdrv_co_discard) {
3769 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3770 } else if (bs->drv->bdrv_aio_discard) {
3771 BlockDriverAIOCB *acb;
3772 CoroutineIOCompletion co = {
3773 .coroutine = qemu_coroutine_self(),
3774 };
3775
3776 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3777 bdrv_co_io_em_complete, &co);
3778 if (acb == NULL) {
3779 return -EIO;
3780 } else {
3781 qemu_coroutine_yield();
3782 return co.ret;
3783 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003784 } else {
3785 return 0;
3786 }
3787}
3788
3789int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3790{
3791 Coroutine *co;
3792 RwCo rwco = {
3793 .bs = bs,
3794 .sector_num = sector_num,
3795 .nb_sectors = nb_sectors,
3796 .ret = NOT_DONE,
3797 };
3798
3799 if (qemu_in_coroutine()) {
3800 /* Fast-path if already in coroutine context */
3801 bdrv_discard_co_entry(&rwco);
3802 } else {
3803 co = qemu_coroutine_create(bdrv_discard_co_entry);
3804 qemu_coroutine_enter(co, &rwco);
3805 while (rwco.ret == NOT_DONE) {
3806 qemu_aio_wait();
3807 }
3808 }
3809
3810 return rwco.ret;
3811}
3812
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003813/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003814/* removable device support */
3815
3816/**
3817 * Return TRUE if the media is present
3818 */
3819int bdrv_is_inserted(BlockDriverState *bs)
3820{
3821 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003822
bellard19cb3732006-08-19 11:45:59 +00003823 if (!drv)
3824 return 0;
3825 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003826 return 1;
3827 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003828}
3829
3830/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003831 * Return whether the media changed since the last call to this
3832 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003833 */
3834int bdrv_media_changed(BlockDriverState *bs)
3835{
3836 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003837
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003838 if (drv && drv->bdrv_media_changed) {
3839 return drv->bdrv_media_changed(bs);
3840 }
3841 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003842}
3843
3844/**
3845 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3846 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003847void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003848{
3849 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003850
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003851 if (drv && drv->bdrv_eject) {
3852 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003853 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003854
3855 if (bs->device_name[0] != '\0') {
3856 bdrv_emit_qmp_eject_event(bs, eject_flag);
3857 }
bellard19cb3732006-08-19 11:45:59 +00003858}
3859
bellard19cb3732006-08-19 11:45:59 +00003860/**
3861 * Lock or unlock the media (if it is locked, the user won't be able
3862 * to eject it manually).
3863 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003864void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003865{
3866 BlockDriver *drv = bs->drv;
3867
Markus Armbruster025e8492011-09-06 18:58:47 +02003868 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003869
Markus Armbruster025e8492011-09-06 18:58:47 +02003870 if (drv && drv->bdrv_lock_medium) {
3871 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003872 }
3873}
ths985a03b2007-12-24 16:10:43 +00003874
3875/* needed for generic scsi interface */
3876
3877int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3878{
3879 BlockDriver *drv = bs->drv;
3880
3881 if (drv && drv->bdrv_ioctl)
3882 return drv->bdrv_ioctl(bs, req, buf);
3883 return -ENOTSUP;
3884}
aliguori7d780662009-03-12 19:57:08 +00003885
aliguori221f7152009-03-28 17:28:41 +00003886BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3887 unsigned long int req, void *buf,
3888 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003889{
aliguori221f7152009-03-28 17:28:41 +00003890 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003891
aliguori221f7152009-03-28 17:28:41 +00003892 if (drv && drv->bdrv_aio_ioctl)
3893 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3894 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003895}
aliguorie268ca52009-04-22 20:20:00 +00003896
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003897void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3898{
3899 bs->buffer_alignment = align;
3900}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003901
aliguorie268ca52009-04-22 20:20:00 +00003902void *qemu_blockalign(BlockDriverState *bs, size_t size)
3903{
3904 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3905}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003906
3907void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3908{
3909 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003910
Liran Schouraaa0eb72010-01-26 10:31:48 +02003911 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003912 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003913 if (!bs->dirty_bitmap) {
3914 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003915 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3916 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003917
Paolo Bonzini71df14f2012-04-12 14:01:04 +02003918 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003919 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003920 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003921 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003922 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003923 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003924 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003925 }
3926}
3927
3928int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3929{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003930 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003931
Jan Kiszkac6d22832009-11-30 18:21:20 +01003932 if (bs->dirty_bitmap &&
3933 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003934 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3935 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003936 } else {
3937 return 0;
3938 }
3939}
3940
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003941void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3942 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003943{
3944 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3945}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003946
3947int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3948{
3949 return bs->dirty_count;
3950}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003951
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003952void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3953{
3954 assert(bs->in_use != in_use);
3955 bs->in_use = in_use;
3956}
3957
3958int bdrv_in_use(BlockDriverState *bs)
3959{
3960 return bs->in_use;
3961}
3962
Luiz Capitulino28a72822011-09-26 17:43:50 -03003963void bdrv_iostatus_enable(BlockDriverState *bs)
3964{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003965 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003966 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003967}
3968
3969/* The I/O status is only enabled if the drive explicitly
3970 * enables it _and_ the VM is configured to stop on errors */
3971bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3972{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003973 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003974 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3975 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3976 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3977}
3978
3979void bdrv_iostatus_disable(BlockDriverState *bs)
3980{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003981 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003982}
3983
3984void bdrv_iostatus_reset(BlockDriverState *bs)
3985{
3986 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003987 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003988 }
3989}
3990
3991/* XXX: Today this is set by device models because it makes the implementation
3992 quite simple. However, the block layer knows about the error, so it's
3993 possible to implement this without device models being involved */
3994void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3995{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003996 if (bdrv_iostatus_is_enabled(bs) &&
3997 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003998 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003999 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4000 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004001 }
4002}
4003
Christoph Hellwiga597e792011-08-25 08:26:01 +02004004void
4005bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4006 enum BlockAcctType type)
4007{
4008 assert(type < BDRV_MAX_IOTYPE);
4009
4010 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004011 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02004012 cookie->type = type;
4013}
4014
4015void
4016bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4017{
4018 assert(cookie->type < BDRV_MAX_IOTYPE);
4019
4020 bs->nr_bytes[cookie->type] += cookie->bytes;
4021 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004022 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02004023}
4024
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004025int bdrv_img_create(const char *filename, const char *fmt,
4026 const char *base_filename, const char *base_fmt,
4027 char *options, uint64_t img_size, int flags)
4028{
4029 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02004030 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004031 BlockDriverState *bs = NULL;
4032 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004033 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004034 int ret = 0;
4035
4036 /* Find driver and parse its options */
4037 drv = bdrv_find_format(fmt);
4038 if (!drv) {
4039 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004040 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004041 goto out;
4042 }
4043
4044 proto_drv = bdrv_find_protocol(filename);
4045 if (!proto_drv) {
4046 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004047 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004048 goto out;
4049 }
4050
4051 create_options = append_option_parameters(create_options,
4052 drv->create_options);
4053 create_options = append_option_parameters(create_options,
4054 proto_drv->create_options);
4055
4056 /* Create parameter list with default values */
4057 param = parse_option_parameters("", create_options, param);
4058
4059 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4060
4061 /* Parse -o options */
4062 if (options) {
4063 param = parse_option_parameters(options, create_options, param);
4064 if (param == NULL) {
4065 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004066 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004067 goto out;
4068 }
4069 }
4070
4071 if (base_filename) {
4072 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4073 base_filename)) {
4074 error_report("Backing file not supported for file format '%s'",
4075 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004076 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004077 goto out;
4078 }
4079 }
4080
4081 if (base_fmt) {
4082 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4083 error_report("Backing file format not supported for file "
4084 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004085 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004086 goto out;
4087 }
4088 }
4089
Jes Sorensen792da932010-12-16 13:52:17 +01004090 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4091 if (backing_file && backing_file->value.s) {
4092 if (!strcmp(filename, backing_file->value.s)) {
4093 error_report("Error: Trying to create an image with the "
4094 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004095 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004096 goto out;
4097 }
4098 }
4099
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004100 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4101 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004102 backing_drv = bdrv_find_format(backing_fmt->value.s);
4103 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004104 error_report("Unknown backing file format '%s'",
4105 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004106 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004107 goto out;
4108 }
4109 }
4110
4111 // The size for the image must always be specified, with one exception:
4112 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004113 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4114 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004115 if (backing_file && backing_file->value.s) {
4116 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004117 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02004118 int back_flags;
4119
4120 /* backing files always opened read-only */
4121 back_flags =
4122 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004123
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004124 bs = bdrv_new("");
4125
Paolo Bonzini63090da2012-04-12 14:01:03 +02004126 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004127 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004128 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004129 goto out;
4130 }
4131 bdrv_get_geometry(bs, &size);
4132 size *= 512;
4133
4134 snprintf(buf, sizeof(buf), "%" PRId64, size);
4135 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4136 } else {
4137 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004138 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004139 goto out;
4140 }
4141 }
4142
4143 printf("Formatting '%s', fmt=%s ", filename, fmt);
4144 print_option_parameters(param);
4145 puts("");
4146
4147 ret = bdrv_create(drv, filename, param);
4148
4149 if (ret < 0) {
4150 if (ret == -ENOTSUP) {
4151 error_report("Formatting or formatting option not supported for "
4152 "file format '%s'", fmt);
4153 } else if (ret == -EFBIG) {
4154 error_report("The image size is too large for file format '%s'",
4155 fmt);
4156 } else {
4157 error_report("%s: error while creating %s: %s", filename, fmt,
4158 strerror(-ret));
4159 }
4160 }
4161
4162out:
4163 free_option_parameters(create_options);
4164 free_option_parameters(param);
4165
4166 if (bs) {
4167 bdrv_delete(bs);
4168 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004169
4170 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004171}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004172
4173void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004174 int64_t speed, BlockDriverCompletionFunc *cb,
4175 void *opaque, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004176{
4177 BlockJob *job;
4178
4179 if (bs->job || bdrv_in_use(bs)) {
Stefan Hajnoczifd7f8c62012-04-25 16:51:00 +01004180 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004181 return NULL;
4182 }
4183 bdrv_set_in_use(bs, 1);
4184
4185 job = g_malloc0(job_type->instance_size);
4186 job->job_type = job_type;
4187 job->bs = bs;
4188 job->cb = cb;
4189 job->opaque = opaque;
4190 bs->job = job;
Stefan Hajnoczic83c66c2012-04-25 16:51:03 +01004191
4192 /* Only set speed when necessary to avoid NotSupported error */
4193 if (speed != 0) {
4194 Error *local_err = NULL;
4195
4196 block_job_set_speed(job, speed, &local_err);
4197 if (error_is_set(&local_err)) {
4198 bs->job = NULL;
4199 g_free(job);
4200 bdrv_set_in_use(bs, 0);
4201 error_propagate(errp, local_err);
4202 return NULL;
4203 }
4204 }
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004205 return job;
4206}
4207
4208void block_job_complete(BlockJob *job, int ret)
4209{
4210 BlockDriverState *bs = job->bs;
4211
4212 assert(bs->job == job);
4213 job->cb(job->opaque, ret);
4214 bs->job = NULL;
4215 g_free(job);
4216 bdrv_set_in_use(bs, 0);
4217}
4218
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004219void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004220{
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004221 Error *local_err = NULL;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004222
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004223 if (!job->job_type->set_speed) {
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004224 error_set(errp, QERR_NOT_SUPPORTED);
4225 return;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004226 }
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004227 job->job_type->set_speed(job, speed, &local_err);
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004228 if (error_is_set(&local_err)) {
4229 error_propagate(errp, local_err);
4230 return;
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004231 }
Stefan Hajnoczi9e6636c2012-04-25 16:51:01 +01004232
Stefan Hajnoczi882ec7c2012-04-25 16:51:02 +01004233 job->speed = speed;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004234}
4235
4236void block_job_cancel(BlockJob *job)
4237{
4238 job->cancelled = true;
4239}
4240
4241bool block_job_is_cancelled(BlockJob *job)
4242{
4243 return job->cancelled;
4244}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004245
4246void block_job_cancel_sync(BlockJob *job)
4247{
4248 BlockDriverState *bs = job->bs;
4249
4250 assert(bs->job == job);
4251 block_job_cancel(job);
4252 while (bs->job != NULL && bs->job->busy) {
4253 qemu_aio_wait();
4254 }
4255}