blob: aa9d14245b07c3e76c238b44885759aeed8f180f [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000849/*
850 * Wait for pending requests to complete across all BlockDriverStates
851 *
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
854 */
855void bdrv_drain_all(void)
856{
857 BlockDriverState *bs;
858
859 qemu_aio_flush();
860
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs, &bdrv_states, list) {
863 assert(QLIST_EMPTY(&bs->tracked_requests));
864 assert(qemu_co_queue_empty(&bs->throttled_reqs));
865 }
866}
867
Ryan Harperd22b2f42011-03-29 20:51:47 -0500868/* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870void bdrv_make_anon(BlockDriverState *bs)
871{
872 if (bs->device_name[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states, bs, list);
874 }
875 bs->device_name[0] = '\0';
876}
877
bellardb3380822004-03-14 21:38:54 +0000878void bdrv_delete(BlockDriverState *bs)
879{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200880 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200881
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100882 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500883 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000884
bellardb3380822004-03-14 21:38:54 +0000885 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200886 if (bs->file != NULL) {
887 bdrv_delete(bs->file);
888 }
889
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200890 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500891 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000892}
893
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200894int bdrv_attach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200896{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200897 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200898 return -EBUSY;
899 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200900 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300901 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200902 return 0;
903}
904
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200905/* TODO qdevified devices don't use this, remove when devices are qdevified */
906void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200907{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200908 if (bdrv_attach_dev(bs, dev) < 0) {
909 abort();
910 }
911}
912
913void bdrv_detach_dev(BlockDriverState *bs, void *dev)
914/* TODO change to DeviceState *dev when all users are qdevified */
915{
916 assert(bs->dev == dev);
917 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918 bs->dev_ops = NULL;
919 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200920 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200921}
922
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200923/* TODO change to return DeviceState * when all users are qdevified */
924void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200925{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200926 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200927}
928
Markus Armbruster0e49de52011-08-03 15:07:41 +0200929void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
930 void *opaque)
931{
932 bs->dev_ops = ops;
933 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200934 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
935 bs_snapshots = NULL;
936 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200937}
938
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200939static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200940{
Markus Armbruster145feb12011-08-03 15:07:42 +0200941 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200942 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200943 }
944}
945
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200946bool bdrv_dev_has_removable_media(BlockDriverState *bs)
947{
948 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
949}
950
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100951void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
952{
953 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
954 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
955 }
956}
957
Markus Armbrustere4def802011-09-06 18:58:53 +0200958bool bdrv_dev_is_tray_open(BlockDriverState *bs)
959{
960 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
961 return bs->dev_ops->is_tray_open(bs->dev_opaque);
962 }
963 return false;
964}
965
Markus Armbruster145feb12011-08-03 15:07:42 +0200966static void bdrv_dev_resize_cb(BlockDriverState *bs)
967{
968 if (bs->dev_ops && bs->dev_ops->resize_cb) {
969 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200970 }
971}
972
Markus Armbrusterf1076392011-09-06 18:58:46 +0200973bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
974{
975 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
976 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
977 }
978 return false;
979}
980
aliguorie97fc192009-04-21 23:11:50 +0000981/*
982 * Run consistency checks on an image
983 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200984 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200985 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200986 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000987 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200988int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000989{
990 if (bs->drv->bdrv_check == NULL) {
991 return -ENOTSUP;
992 }
993
Kevin Wolfe076f332010-06-29 11:43:13 +0200994 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200995 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000996}
997
Kevin Wolf8a426612010-07-16 17:17:01 +0200998#define COMMIT_BUF_SECTORS 2048
999
bellard33e39632003-07-06 17:15:21 +00001000/* commit COW file into the raw image */
1001int bdrv_commit(BlockDriverState *bs)
1002{
bellard19cb3732006-08-19 11:45:59 +00001003 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001004 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001005 int64_t sector, total_sectors;
1006 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001007 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001008 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001009 char filename[1024];
1010 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001011
bellard19cb3732006-08-19 11:45:59 +00001012 if (!drv)
1013 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001014
1015 if (!bs->backing_hd) {
1016 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001017 }
1018
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001019 if (bs->backing_hd->keep_read_only) {
1020 return -EACCES;
1021 }
Kevin Wolfee181192010-08-05 13:05:22 +02001022
1023 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001024 ro = bs->backing_hd->read_only;
1025 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1026 open_flags = bs->backing_hd->open_flags;
1027
1028 if (ro) {
1029 /* re-open as RW */
1030 bdrv_delete(bs->backing_hd);
1031 bs->backing_hd = NULL;
1032 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001033 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1034 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001035 if (rw_ret < 0) {
1036 bdrv_delete(bs_rw);
1037 /* try to re-open read-only */
1038 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001039 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1040 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001041 if (ret < 0) {
1042 bdrv_delete(bs_ro);
1043 /* drive not functional anymore */
1044 bs->drv = NULL;
1045 return ret;
1046 }
1047 bs->backing_hd = bs_ro;
1048 return rw_ret;
1049 }
1050 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001051 }
bellardea2384d2004-08-01 21:59:26 +00001052
Jan Kiszka6ea44302009-11-30 18:21:19 +01001053 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001054 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001055
Kevin Wolf8a426612010-07-16 17:17:01 +02001056 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001057 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001058
1059 if (bdrv_read(bs, sector, buf, n) != 0) {
1060 ret = -EIO;
1061 goto ro_cleanup;
1062 }
1063
1064 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1065 ret = -EIO;
1066 goto ro_cleanup;
1067 }
bellardea2384d2004-08-01 21:59:26 +00001068 }
1069 }
bellard95389c82005-12-18 18:28:15 +00001070
Christoph Hellwig1d449522010-01-17 12:32:30 +01001071 if (drv->bdrv_make_empty) {
1072 ret = drv->bdrv_make_empty(bs);
1073 bdrv_flush(bs);
1074 }
bellard95389c82005-12-18 18:28:15 +00001075
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001076 /*
1077 * Make sure all data we wrote to the backing device is actually
1078 * stable on disk.
1079 */
1080 if (bs->backing_hd)
1081 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001082
1083ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001084 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001085
1086 if (ro) {
1087 /* re-open as RO */
1088 bdrv_delete(bs->backing_hd);
1089 bs->backing_hd = NULL;
1090 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001091 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1092 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001093 if (ret < 0) {
1094 bdrv_delete(bs_ro);
1095 /* drive not functional anymore */
1096 bs->drv = NULL;
1097 return ret;
1098 }
1099 bs->backing_hd = bs_ro;
1100 bs->backing_hd->keep_read_only = 0;
1101 }
1102
Christoph Hellwig1d449522010-01-17 12:32:30 +01001103 return ret;
bellard33e39632003-07-06 17:15:21 +00001104}
1105
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001106void bdrv_commit_all(void)
1107{
1108 BlockDriverState *bs;
1109
1110 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1111 bdrv_commit(bs);
1112 }
1113}
1114
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001115struct BdrvTrackedRequest {
1116 BlockDriverState *bs;
1117 int64_t sector_num;
1118 int nb_sectors;
1119 bool is_write;
1120 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001121 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001122 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001123};
1124
1125/**
1126 * Remove an active request from the tracked requests list
1127 *
1128 * This function should be called when a tracked request is completing.
1129 */
1130static void tracked_request_end(BdrvTrackedRequest *req)
1131{
1132 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001133 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001134}
1135
1136/**
1137 * Add an active request to the tracked requests list
1138 */
1139static void tracked_request_begin(BdrvTrackedRequest *req,
1140 BlockDriverState *bs,
1141 int64_t sector_num,
1142 int nb_sectors, bool is_write)
1143{
1144 *req = (BdrvTrackedRequest){
1145 .bs = bs,
1146 .sector_num = sector_num,
1147 .nb_sectors = nb_sectors,
1148 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001149 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001150 };
1151
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001152 qemu_co_queue_init(&req->wait_queue);
1153
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001154 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1155}
1156
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001157/**
1158 * Round a region to cluster boundaries
1159 */
1160static void round_to_clusters(BlockDriverState *bs,
1161 int64_t sector_num, int nb_sectors,
1162 int64_t *cluster_sector_num,
1163 int *cluster_nb_sectors)
1164{
1165 BlockDriverInfo bdi;
1166
1167 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1168 *cluster_sector_num = sector_num;
1169 *cluster_nb_sectors = nb_sectors;
1170 } else {
1171 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1172 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1173 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1174 nb_sectors, c);
1175 }
1176}
1177
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001178static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1179 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001180 /* aaaa bbbb */
1181 if (sector_num >= req->sector_num + req->nb_sectors) {
1182 return false;
1183 }
1184 /* bbbb aaaa */
1185 if (req->sector_num >= sector_num + nb_sectors) {
1186 return false;
1187 }
1188 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001189}
1190
1191static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1192 int64_t sector_num, int nb_sectors)
1193{
1194 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001195 int64_t cluster_sector_num;
1196 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001197 bool retry;
1198
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001199 /* If we touch the same cluster it counts as an overlap. This guarantees
1200 * that allocating writes will be serialized and not race with each other
1201 * for the same cluster. For example, in copy-on-read it ensures that the
1202 * CoR read and write operations are atomic and guest writes cannot
1203 * interleave between them.
1204 */
1205 round_to_clusters(bs, sector_num, nb_sectors,
1206 &cluster_sector_num, &cluster_nb_sectors);
1207
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001208 do {
1209 retry = false;
1210 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001211 if (tracked_request_overlaps(req, cluster_sector_num,
1212 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001213 /* Hitting this means there was a reentrant request, for
1214 * example, a block driver issuing nested requests. This must
1215 * never happen since it means deadlock.
1216 */
1217 assert(qemu_coroutine_self() != req->co);
1218
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001219 qemu_co_queue_wait(&req->wait_queue);
1220 retry = true;
1221 break;
1222 }
1223 }
1224 } while (retry);
1225}
1226
Kevin Wolf756e6732010-01-12 12:55:17 +01001227/*
1228 * Return values:
1229 * 0 - success
1230 * -EINVAL - backing format specified, but no file
1231 * -ENOSPC - can't update the backing file because no space is left in the
1232 * image file header
1233 * -ENOTSUP - format driver doesn't support changing the backing file
1234 */
1235int bdrv_change_backing_file(BlockDriverState *bs,
1236 const char *backing_file, const char *backing_fmt)
1237{
1238 BlockDriver *drv = bs->drv;
1239
1240 if (drv->bdrv_change_backing_file != NULL) {
1241 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1242 } else {
1243 return -ENOTSUP;
1244 }
1245}
1246
aliguori71d07702009-03-03 17:37:16 +00001247static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1248 size_t size)
1249{
1250 int64_t len;
1251
1252 if (!bdrv_is_inserted(bs))
1253 return -ENOMEDIUM;
1254
1255 if (bs->growable)
1256 return 0;
1257
1258 len = bdrv_getlength(bs);
1259
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001260 if (offset < 0)
1261 return -EIO;
1262
1263 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001264 return -EIO;
1265
1266 return 0;
1267}
1268
1269static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1270 int nb_sectors)
1271{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001272 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1273 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001274}
1275
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001276typedef struct RwCo {
1277 BlockDriverState *bs;
1278 int64_t sector_num;
1279 int nb_sectors;
1280 QEMUIOVector *qiov;
1281 bool is_write;
1282 int ret;
1283} RwCo;
1284
1285static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1286{
1287 RwCo *rwco = opaque;
1288
1289 if (!rwco->is_write) {
1290 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1291 rwco->nb_sectors, rwco->qiov);
1292 } else {
1293 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1294 rwco->nb_sectors, rwco->qiov);
1295 }
1296}
1297
1298/*
1299 * Process a synchronous request using coroutines
1300 */
1301static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1302 int nb_sectors, bool is_write)
1303{
1304 QEMUIOVector qiov;
1305 struct iovec iov = {
1306 .iov_base = (void *)buf,
1307 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1308 };
1309 Coroutine *co;
1310 RwCo rwco = {
1311 .bs = bs,
1312 .sector_num = sector_num,
1313 .nb_sectors = nb_sectors,
1314 .qiov = &qiov,
1315 .is_write = is_write,
1316 .ret = NOT_DONE,
1317 };
1318
1319 qemu_iovec_init_external(&qiov, &iov, 1);
1320
1321 if (qemu_in_coroutine()) {
1322 /* Fast-path if already in coroutine context */
1323 bdrv_rw_co_entry(&rwco);
1324 } else {
1325 co = qemu_coroutine_create(bdrv_rw_co_entry);
1326 qemu_coroutine_enter(co, &rwco);
1327 while (rwco.ret == NOT_DONE) {
1328 qemu_aio_wait();
1329 }
1330 }
1331 return rwco.ret;
1332}
1333
bellard19cb3732006-08-19 11:45:59 +00001334/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001335int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001336 uint8_t *buf, int nb_sectors)
1337{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001338 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001339}
1340
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001341static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001342 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001343{
1344 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001345 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001346
Jan Kiszka6ea44302009-11-30 18:21:19 +01001347 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001348 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001349
1350 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001351 idx = start / (sizeof(unsigned long) * 8);
1352 bit = start % (sizeof(unsigned long) * 8);
1353 val = bs->dirty_bitmap[idx];
1354 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001355 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001356 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001357 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001358 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001359 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001360 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001361 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001362 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001363 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001364 }
1365 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001366 }
1367}
1368
ths5fafdf22007-09-16 21:08:06 +00001369/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001370 -EIO generic I/O error (may happen for all errors)
1371 -ENOMEDIUM No media inserted.
1372 -EINVAL Invalid sector number or nb_sectors
1373 -EACCES Trying to write a read-only device
1374*/
ths5fafdf22007-09-16 21:08:06 +00001375int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001376 const uint8_t *buf, int nb_sectors)
1377{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001378 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001379}
1380
aliguorieda578e2009-03-12 19:57:16 +00001381int bdrv_pread(BlockDriverState *bs, int64_t offset,
1382 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001383{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001384 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001385 int len, nb_sectors, count;
1386 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001387 int ret;
bellard83f64092006-08-01 16:21:11 +00001388
1389 count = count1;
1390 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001391 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001392 if (len > count)
1393 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001394 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001395 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001396 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1397 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001398 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001399 count -= len;
1400 if (count == 0)
1401 return count1;
1402 sector_num++;
1403 buf += len;
1404 }
1405
1406 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001407 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001408 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001409 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1410 return ret;
bellard83f64092006-08-01 16:21:11 +00001411 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001412 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001413 buf += len;
1414 count -= len;
1415 }
1416
1417 /* add data from the last sector */
1418 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001419 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1420 return ret;
bellard83f64092006-08-01 16:21:11 +00001421 memcpy(buf, tmp_buf, count);
1422 }
1423 return count1;
1424}
1425
aliguorieda578e2009-03-12 19:57:16 +00001426int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1427 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001428{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001429 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001430 int len, nb_sectors, count;
1431 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001432 int ret;
bellard83f64092006-08-01 16:21:11 +00001433
1434 count = count1;
1435 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001436 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001437 if (len > count)
1438 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001439 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001440 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001441 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1442 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001443 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001444 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1445 return ret;
bellard83f64092006-08-01 16:21:11 +00001446 count -= len;
1447 if (count == 0)
1448 return count1;
1449 sector_num++;
1450 buf += len;
1451 }
1452
1453 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001454 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001455 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001456 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1457 return ret;
bellard83f64092006-08-01 16:21:11 +00001458 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001459 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001460 buf += len;
1461 count -= len;
1462 }
1463
1464 /* add data from the last sector */
1465 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001466 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1467 return ret;
bellard83f64092006-08-01 16:21:11 +00001468 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001469 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1470 return ret;
bellard83f64092006-08-01 16:21:11 +00001471 }
1472 return count1;
1473}
bellard83f64092006-08-01 16:21:11 +00001474
Kevin Wolff08145f2010-06-16 16:38:15 +02001475/*
1476 * Writes to the file and ensures that no writes are reordered across this
1477 * request (acts as a barrier)
1478 *
1479 * Returns 0 on success, -errno in error cases.
1480 */
1481int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1482 const void *buf, int count)
1483{
1484 int ret;
1485
1486 ret = bdrv_pwrite(bs, offset, buf, count);
1487 if (ret < 0) {
1488 return ret;
1489 }
1490
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001491 /* No flush needed for cache modes that use O_DSYNC */
1492 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001493 bdrv_flush(bs);
1494 }
1495
1496 return 0;
1497}
1498
Stefan Hajnocziab185922011-11-17 13:40:31 +00001499static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1500 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1501{
1502 /* Perform I/O through a temporary buffer so that users who scribble over
1503 * their read buffer while the operation is in progress do not end up
1504 * modifying the image file. This is critical for zero-copy guest I/O
1505 * where anything might happen inside guest memory.
1506 */
1507 void *bounce_buffer;
1508
1509 struct iovec iov;
1510 QEMUIOVector bounce_qiov;
1511 int64_t cluster_sector_num;
1512 int cluster_nb_sectors;
1513 size_t skip_bytes;
1514 int ret;
1515
1516 /* Cover entire cluster so no additional backing file I/O is required when
1517 * allocating cluster in the image file.
1518 */
1519 round_to_clusters(bs, sector_num, nb_sectors,
1520 &cluster_sector_num, &cluster_nb_sectors);
1521
1522 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1523 cluster_sector_num, cluster_nb_sectors);
1524
1525 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1526 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1527 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1528
1529 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1530 &bounce_qiov);
1531 if (ret < 0) {
1532 goto err;
1533 }
1534
1535 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1536 &bounce_qiov);
1537 if (ret < 0) {
1538 /* It might be okay to ignore write errors for guest requests. If this
1539 * is a deliberate copy-on-read then we don't want to ignore the error.
1540 * Simply report it in all cases.
1541 */
1542 goto err;
1543 }
1544
1545 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1546 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1547 nb_sectors * BDRV_SECTOR_SIZE);
1548
1549err:
1550 qemu_vfree(bounce_buffer);
1551 return ret;
1552}
1553
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001554/*
1555 * Handle a read request in coroutine context
1556 */
1557static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1558 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001559{
1560 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001561 BdrvTrackedRequest req;
1562 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001563
Kevin Wolfda1fa912011-07-14 17:27:13 +02001564 if (!drv) {
1565 return -ENOMEDIUM;
1566 }
1567 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1568 return -EIO;
1569 }
1570
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001571 /* throttling disk read I/O */
1572 if (bs->io_limits_enabled) {
1573 bdrv_io_limits_intercept(bs, false, nb_sectors);
1574 }
1575
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001576 if (bs->copy_on_read) {
1577 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1578 }
1579
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001580 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001581
1582 if (bs->copy_on_read) {
1583 int pnum;
1584
1585 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1586 if (ret < 0) {
1587 goto out;
1588 }
1589
1590 if (!ret || pnum != nb_sectors) {
1591 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1592 goto out;
1593 }
1594 }
1595
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001596 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001597
1598out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001599 tracked_request_end(&req);
1600 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001601}
1602
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001603int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001604 int nb_sectors, QEMUIOVector *qiov)
1605{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001606 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001607
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001608 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1609}
1610
1611/*
1612 * Handle a write request in coroutine context
1613 */
1614static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1615 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1616{
1617 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001618 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001619 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001620
1621 if (!bs->drv) {
1622 return -ENOMEDIUM;
1623 }
1624 if (bs->read_only) {
1625 return -EACCES;
1626 }
1627 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1628 return -EIO;
1629 }
1630
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001631 /* throttling disk write I/O */
1632 if (bs->io_limits_enabled) {
1633 bdrv_io_limits_intercept(bs, true, nb_sectors);
1634 }
1635
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001636 if (bs->copy_on_read) {
1637 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1638 }
1639
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001640 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1641
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001642 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1643
Kevin Wolfda1fa912011-07-14 17:27:13 +02001644 if (bs->dirty_bitmap) {
1645 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1646 }
1647
1648 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1649 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1650 }
1651
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001652 tracked_request_end(&req);
1653
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001654 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001655}
1656
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001657int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1658 int nb_sectors, QEMUIOVector *qiov)
1659{
1660 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1661
1662 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1663}
1664
bellard83f64092006-08-01 16:21:11 +00001665/**
bellard83f64092006-08-01 16:21:11 +00001666 * Truncate file to 'offset' bytes (needed only for file protocols)
1667 */
1668int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1669{
1670 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001671 int ret;
bellard83f64092006-08-01 16:21:11 +00001672 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001673 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001674 if (!drv->bdrv_truncate)
1675 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001676 if (bs->read_only)
1677 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001678 if (bdrv_in_use(bs))
1679 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001680 ret = drv->bdrv_truncate(bs, offset);
1681 if (ret == 0) {
1682 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001683 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001684 }
1685 return ret;
bellard83f64092006-08-01 16:21:11 +00001686}
1687
1688/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001689 * Length of a allocated file in bytes. Sparse files are counted by actual
1690 * allocated space. Return < 0 if error or unknown.
1691 */
1692int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1693{
1694 BlockDriver *drv = bs->drv;
1695 if (!drv) {
1696 return -ENOMEDIUM;
1697 }
1698 if (drv->bdrv_get_allocated_file_size) {
1699 return drv->bdrv_get_allocated_file_size(bs);
1700 }
1701 if (bs->file) {
1702 return bdrv_get_allocated_file_size(bs->file);
1703 }
1704 return -ENOTSUP;
1705}
1706
1707/**
bellard83f64092006-08-01 16:21:11 +00001708 * Length of a file in bytes. Return < 0 if error or unknown.
1709 */
1710int64_t bdrv_getlength(BlockDriverState *bs)
1711{
1712 BlockDriver *drv = bs->drv;
1713 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001714 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001715
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001716 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001717 if (drv->bdrv_getlength) {
1718 return drv->bdrv_getlength(bs);
1719 }
bellard83f64092006-08-01 16:21:11 +00001720 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001721 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001722}
1723
bellard19cb3732006-08-19 11:45:59 +00001724/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001725void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001726{
bellard19cb3732006-08-19 11:45:59 +00001727 int64_t length;
1728 length = bdrv_getlength(bs);
1729 if (length < 0)
1730 length = 0;
1731 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001732 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001733 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001734}
bellardcf989512004-02-16 21:56:36 +00001735
aliguorif3d54fc2008-11-25 21:50:24 +00001736struct partition {
1737 uint8_t boot_ind; /* 0x80 - active */
1738 uint8_t head; /* starting head */
1739 uint8_t sector; /* starting sector */
1740 uint8_t cyl; /* starting cylinder */
1741 uint8_t sys_ind; /* What partition type */
1742 uint8_t end_head; /* end head */
1743 uint8_t end_sector; /* end sector */
1744 uint8_t end_cyl; /* end cylinder */
1745 uint32_t start_sect; /* starting sector counting from 0 */
1746 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001747} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001748
1749/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1750static int guess_disk_lchs(BlockDriverState *bs,
1751 int *pcylinders, int *pheads, int *psectors)
1752{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001753 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001754 int ret, i, heads, sectors, cylinders;
1755 struct partition *p;
1756 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001757 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001758
1759 bdrv_get_geometry(bs, &nb_sectors);
1760
1761 ret = bdrv_read(bs, 0, buf, 1);
1762 if (ret < 0)
1763 return -1;
1764 /* test msdos magic */
1765 if (buf[510] != 0x55 || buf[511] != 0xaa)
1766 return -1;
1767 for(i = 0; i < 4; i++) {
1768 p = ((struct partition *)(buf + 0x1be)) + i;
1769 nr_sects = le32_to_cpu(p->nr_sects);
1770 if (nr_sects && p->end_head) {
1771 /* We make the assumption that the partition terminates on
1772 a cylinder boundary */
1773 heads = p->end_head + 1;
1774 sectors = p->end_sector & 63;
1775 if (sectors == 0)
1776 continue;
1777 cylinders = nb_sectors / (heads * sectors);
1778 if (cylinders < 1 || cylinders > 16383)
1779 continue;
1780 *pheads = heads;
1781 *psectors = sectors;
1782 *pcylinders = cylinders;
1783#if 0
1784 printf("guessed geometry: LCHS=%d %d %d\n",
1785 cylinders, heads, sectors);
1786#endif
1787 return 0;
1788 }
1789 }
1790 return -1;
1791}
1792
1793void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1794{
1795 int translation, lba_detected = 0;
1796 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001797 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001798
1799 /* if a geometry hint is available, use it */
1800 bdrv_get_geometry(bs, &nb_sectors);
1801 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1802 translation = bdrv_get_translation_hint(bs);
1803 if (cylinders != 0) {
1804 *pcyls = cylinders;
1805 *pheads = heads;
1806 *psecs = secs;
1807 } else {
1808 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1809 if (heads > 16) {
1810 /* if heads > 16, it means that a BIOS LBA
1811 translation was active, so the default
1812 hardware geometry is OK */
1813 lba_detected = 1;
1814 goto default_geometry;
1815 } else {
1816 *pcyls = cylinders;
1817 *pheads = heads;
1818 *psecs = secs;
1819 /* disable any translation to be in sync with
1820 the logical geometry */
1821 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1822 bdrv_set_translation_hint(bs,
1823 BIOS_ATA_TRANSLATION_NONE);
1824 }
1825 }
1826 } else {
1827 default_geometry:
1828 /* if no geometry, use a standard physical disk geometry */
1829 cylinders = nb_sectors / (16 * 63);
1830
1831 if (cylinders > 16383)
1832 cylinders = 16383;
1833 else if (cylinders < 2)
1834 cylinders = 2;
1835 *pcyls = cylinders;
1836 *pheads = 16;
1837 *psecs = 63;
1838 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1839 if ((*pcyls * *pheads) <= 131072) {
1840 bdrv_set_translation_hint(bs,
1841 BIOS_ATA_TRANSLATION_LARGE);
1842 } else {
1843 bdrv_set_translation_hint(bs,
1844 BIOS_ATA_TRANSLATION_LBA);
1845 }
1846 }
1847 }
1848 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1849 }
1850}
1851
ths5fafdf22007-09-16 21:08:06 +00001852void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001853 int cyls, int heads, int secs)
1854{
1855 bs->cyls = cyls;
1856 bs->heads = heads;
1857 bs->secs = secs;
1858}
1859
bellard46d47672004-11-16 01:45:27 +00001860void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1861{
1862 bs->translation = translation;
1863}
1864
ths5fafdf22007-09-16 21:08:06 +00001865void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001866 int *pcyls, int *pheads, int *psecs)
1867{
1868 *pcyls = bs->cyls;
1869 *pheads = bs->heads;
1870 *psecs = bs->secs;
1871}
1872
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001873/* throttling disk io limits */
1874void bdrv_set_io_limits(BlockDriverState *bs,
1875 BlockIOLimit *io_limits)
1876{
1877 bs->io_limits = *io_limits;
1878 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1879}
1880
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001881/* Recognize floppy formats */
1882typedef struct FDFormat {
1883 FDriveType drive;
1884 uint8_t last_sect;
1885 uint8_t max_track;
1886 uint8_t max_head;
1887} FDFormat;
1888
1889static const FDFormat fd_formats[] = {
1890 /* First entry is default format */
1891 /* 1.44 MB 3"1/2 floppy disks */
1892 { FDRIVE_DRV_144, 18, 80, 1, },
1893 { FDRIVE_DRV_144, 20, 80, 1, },
1894 { FDRIVE_DRV_144, 21, 80, 1, },
1895 { FDRIVE_DRV_144, 21, 82, 1, },
1896 { FDRIVE_DRV_144, 21, 83, 1, },
1897 { FDRIVE_DRV_144, 22, 80, 1, },
1898 { FDRIVE_DRV_144, 23, 80, 1, },
1899 { FDRIVE_DRV_144, 24, 80, 1, },
1900 /* 2.88 MB 3"1/2 floppy disks */
1901 { FDRIVE_DRV_288, 36, 80, 1, },
1902 { FDRIVE_DRV_288, 39, 80, 1, },
1903 { FDRIVE_DRV_288, 40, 80, 1, },
1904 { FDRIVE_DRV_288, 44, 80, 1, },
1905 { FDRIVE_DRV_288, 48, 80, 1, },
1906 /* 720 kB 3"1/2 floppy disks */
1907 { FDRIVE_DRV_144, 9, 80, 1, },
1908 { FDRIVE_DRV_144, 10, 80, 1, },
1909 { FDRIVE_DRV_144, 10, 82, 1, },
1910 { FDRIVE_DRV_144, 10, 83, 1, },
1911 { FDRIVE_DRV_144, 13, 80, 1, },
1912 { FDRIVE_DRV_144, 14, 80, 1, },
1913 /* 1.2 MB 5"1/4 floppy disks */
1914 { FDRIVE_DRV_120, 15, 80, 1, },
1915 { FDRIVE_DRV_120, 18, 80, 1, },
1916 { FDRIVE_DRV_120, 18, 82, 1, },
1917 { FDRIVE_DRV_120, 18, 83, 1, },
1918 { FDRIVE_DRV_120, 20, 80, 1, },
1919 /* 720 kB 5"1/4 floppy disks */
1920 { FDRIVE_DRV_120, 9, 80, 1, },
1921 { FDRIVE_DRV_120, 11, 80, 1, },
1922 /* 360 kB 5"1/4 floppy disks */
1923 { FDRIVE_DRV_120, 9, 40, 1, },
1924 { FDRIVE_DRV_120, 9, 40, 0, },
1925 { FDRIVE_DRV_120, 10, 41, 1, },
1926 { FDRIVE_DRV_120, 10, 42, 1, },
1927 /* 320 kB 5"1/4 floppy disks */
1928 { FDRIVE_DRV_120, 8, 40, 1, },
1929 { FDRIVE_DRV_120, 8, 40, 0, },
1930 /* 360 kB must match 5"1/4 better than 3"1/2... */
1931 { FDRIVE_DRV_144, 9, 80, 0, },
1932 /* end */
1933 { FDRIVE_DRV_NONE, -1, -1, 0, },
1934};
1935
1936void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1937 int *max_track, int *last_sect,
1938 FDriveType drive_in, FDriveType *drive)
1939{
1940 const FDFormat *parse;
1941 uint64_t nb_sectors, size;
1942 int i, first_match, match;
1943
1944 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1945 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1946 /* User defined disk */
1947 } else {
1948 bdrv_get_geometry(bs, &nb_sectors);
1949 match = -1;
1950 first_match = -1;
1951 for (i = 0; ; i++) {
1952 parse = &fd_formats[i];
1953 if (parse->drive == FDRIVE_DRV_NONE) {
1954 break;
1955 }
1956 if (drive_in == parse->drive ||
1957 drive_in == FDRIVE_DRV_NONE) {
1958 size = (parse->max_head + 1) * parse->max_track *
1959 parse->last_sect;
1960 if (nb_sectors == size) {
1961 match = i;
1962 break;
1963 }
1964 if (first_match == -1) {
1965 first_match = i;
1966 }
1967 }
1968 }
1969 if (match == -1) {
1970 if (first_match == -1) {
1971 match = 1;
1972 } else {
1973 match = first_match;
1974 }
1975 parse = &fd_formats[match];
1976 }
1977 *nb_heads = parse->max_head + 1;
1978 *max_track = parse->max_track;
1979 *last_sect = parse->last_sect;
1980 *drive = parse->drive;
1981 }
1982}
1983
bellard46d47672004-11-16 01:45:27 +00001984int bdrv_get_translation_hint(BlockDriverState *bs)
1985{
1986 return bs->translation;
1987}
1988
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001989void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1990 BlockErrorAction on_write_error)
1991{
1992 bs->on_read_error = on_read_error;
1993 bs->on_write_error = on_write_error;
1994}
1995
1996BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1997{
1998 return is_read ? bs->on_read_error : bs->on_write_error;
1999}
2000
bellardb3380822004-03-14 21:38:54 +00002001int bdrv_is_read_only(BlockDriverState *bs)
2002{
2003 return bs->read_only;
2004}
2005
ths985a03b2007-12-24 16:10:43 +00002006int bdrv_is_sg(BlockDriverState *bs)
2007{
2008 return bs->sg;
2009}
2010
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002011int bdrv_enable_write_cache(BlockDriverState *bs)
2012{
2013 return bs->enable_write_cache;
2014}
2015
bellardea2384d2004-08-01 21:59:26 +00002016int bdrv_is_encrypted(BlockDriverState *bs)
2017{
2018 if (bs->backing_hd && bs->backing_hd->encrypted)
2019 return 1;
2020 return bs->encrypted;
2021}
2022
aliguoric0f4ce72009-03-05 23:01:01 +00002023int bdrv_key_required(BlockDriverState *bs)
2024{
2025 BlockDriverState *backing_hd = bs->backing_hd;
2026
2027 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2028 return 1;
2029 return (bs->encrypted && !bs->valid_key);
2030}
2031
bellardea2384d2004-08-01 21:59:26 +00002032int bdrv_set_key(BlockDriverState *bs, const char *key)
2033{
2034 int ret;
2035 if (bs->backing_hd && bs->backing_hd->encrypted) {
2036 ret = bdrv_set_key(bs->backing_hd, key);
2037 if (ret < 0)
2038 return ret;
2039 if (!bs->encrypted)
2040 return 0;
2041 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002042 if (!bs->encrypted) {
2043 return -EINVAL;
2044 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2045 return -ENOMEDIUM;
2046 }
aliguoric0f4ce72009-03-05 23:01:01 +00002047 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002048 if (ret < 0) {
2049 bs->valid_key = 0;
2050 } else if (!bs->valid_key) {
2051 bs->valid_key = 1;
2052 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002053 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002054 }
aliguoric0f4ce72009-03-05 23:01:01 +00002055 return ret;
bellardea2384d2004-08-01 21:59:26 +00002056}
2057
2058void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2059{
bellard19cb3732006-08-19 11:45:59 +00002060 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002061 buf[0] = '\0';
2062 } else {
2063 pstrcpy(buf, buf_size, bs->drv->format_name);
2064 }
2065}
2066
ths5fafdf22007-09-16 21:08:06 +00002067void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002068 void *opaque)
2069{
2070 BlockDriver *drv;
2071
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002072 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002073 it(opaque, drv->format_name);
2074 }
2075}
2076
bellardb3380822004-03-14 21:38:54 +00002077BlockDriverState *bdrv_find(const char *name)
2078{
2079 BlockDriverState *bs;
2080
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002081 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2082 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002083 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002084 }
bellardb3380822004-03-14 21:38:54 +00002085 }
2086 return NULL;
2087}
2088
Markus Armbruster2f399b02010-06-02 18:55:20 +02002089BlockDriverState *bdrv_next(BlockDriverState *bs)
2090{
2091 if (!bs) {
2092 return QTAILQ_FIRST(&bdrv_states);
2093 }
2094 return QTAILQ_NEXT(bs, list);
2095}
2096
aliguori51de9762009-03-05 23:00:43 +00002097void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002098{
2099 BlockDriverState *bs;
2100
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002101 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002102 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002103 }
2104}
2105
bellardea2384d2004-08-01 21:59:26 +00002106const char *bdrv_get_device_name(BlockDriverState *bs)
2107{
2108 return bs->device_name;
2109}
2110
aliguoric6ca28d2008-10-06 13:55:43 +00002111void bdrv_flush_all(void)
2112{
2113 BlockDriverState *bs;
2114
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002115 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002116 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002117 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002118 }
2119 }
aliguoric6ca28d2008-10-06 13:55:43 +00002120}
2121
Kevin Wolff2feebb2010-04-14 17:30:35 +02002122int bdrv_has_zero_init(BlockDriverState *bs)
2123{
2124 assert(bs->drv);
2125
Kevin Wolf336c1c12010-07-28 11:26:29 +02002126 if (bs->drv->bdrv_has_zero_init) {
2127 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002128 }
2129
2130 return 1;
2131}
2132
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002133typedef struct BdrvCoIsAllocatedData {
2134 BlockDriverState *bs;
2135 int64_t sector_num;
2136 int nb_sectors;
2137 int *pnum;
2138 int ret;
2139 bool done;
2140} BdrvCoIsAllocatedData;
2141
thsf58c7b32008-06-05 21:53:49 +00002142/*
2143 * Returns true iff the specified sector is present in the disk image. Drivers
2144 * not implementing the functionality are assumed to not support backing files,
2145 * hence all their sectors are reported as allocated.
2146 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002147 * If 'sector_num' is beyond the end of the disk image the return value is 0
2148 * and 'pnum' is set to 0.
2149 *
thsf58c7b32008-06-05 21:53:49 +00002150 * 'pnum' is set to the number of sectors (including and immediately following
2151 * the specified sector) that are known to be in the same
2152 * allocated/unallocated state.
2153 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002154 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2155 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002156 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002157int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2158 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002159{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002160 int64_t n;
2161
2162 if (sector_num >= bs->total_sectors) {
2163 *pnum = 0;
2164 return 0;
2165 }
2166
2167 n = bs->total_sectors - sector_num;
2168 if (n < nb_sectors) {
2169 nb_sectors = n;
2170 }
2171
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002172 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002173 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002174 return 1;
2175 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002176
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002177 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2178}
2179
2180/* Coroutine wrapper for bdrv_is_allocated() */
2181static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2182{
2183 BdrvCoIsAllocatedData *data = opaque;
2184 BlockDriverState *bs = data->bs;
2185
2186 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2187 data->pnum);
2188 data->done = true;
2189}
2190
2191/*
2192 * Synchronous wrapper around bdrv_co_is_allocated().
2193 *
2194 * See bdrv_co_is_allocated() for details.
2195 */
2196int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2197 int *pnum)
2198{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002199 Coroutine *co;
2200 BdrvCoIsAllocatedData data = {
2201 .bs = bs,
2202 .sector_num = sector_num,
2203 .nb_sectors = nb_sectors,
2204 .pnum = pnum,
2205 .done = false,
2206 };
2207
2208 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2209 qemu_coroutine_enter(co, &data);
2210 while (!data.done) {
2211 qemu_aio_wait();
2212 }
2213 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002214}
2215
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002216void bdrv_mon_event(const BlockDriverState *bdrv,
2217 BlockMonEventAction action, int is_read)
2218{
2219 QObject *data;
2220 const char *action_str;
2221
2222 switch (action) {
2223 case BDRV_ACTION_REPORT:
2224 action_str = "report";
2225 break;
2226 case BDRV_ACTION_IGNORE:
2227 action_str = "ignore";
2228 break;
2229 case BDRV_ACTION_STOP:
2230 action_str = "stop";
2231 break;
2232 default:
2233 abort();
2234 }
2235
2236 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2237 bdrv->device_name,
2238 action_str,
2239 is_read ? "read" : "write");
2240 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2241
2242 qobject_decref(data);
2243}
2244
Luiz Capitulinob2023812011-09-21 17:16:47 -03002245BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002246{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002247 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002248 BlockDriverState *bs;
2249
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002250 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002251 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002252
Luiz Capitulinob2023812011-09-21 17:16:47 -03002253 info->value = g_malloc0(sizeof(*info->value));
2254 info->value->device = g_strdup(bs->device_name);
2255 info->value->type = g_strdup("unknown");
2256 info->value->locked = bdrv_dev_is_medium_locked(bs);
2257 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002258
Markus Armbrustere4def802011-09-06 18:58:53 +02002259 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002260 info->value->has_tray_open = true;
2261 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002262 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002263
2264 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002265 info->value->has_io_status = true;
2266 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002267 }
2268
bellard19cb3732006-08-19 11:45:59 +00002269 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002270 info->value->has_inserted = true;
2271 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2272 info->value->inserted->file = g_strdup(bs->filename);
2273 info->value->inserted->ro = bs->read_only;
2274 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2275 info->value->inserted->encrypted = bs->encrypted;
2276 if (bs->backing_file[0]) {
2277 info->value->inserted->has_backing_file = true;
2278 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002279 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002280
2281 if (bs->io_limits_enabled) {
2282 info->value->inserted->bps =
2283 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2284 info->value->inserted->bps_rd =
2285 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2286 info->value->inserted->bps_wr =
2287 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2288 info->value->inserted->iops =
2289 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2290 info->value->inserted->iops_rd =
2291 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2292 info->value->inserted->iops_wr =
2293 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2294 }
bellardb3380822004-03-14 21:38:54 +00002295 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002296
2297 /* XXX: waiting for the qapi to support GSList */
2298 if (!cur_item) {
2299 head = cur_item = info;
2300 } else {
2301 cur_item->next = info;
2302 cur_item = info;
2303 }
bellardb3380822004-03-14 21:38:54 +00002304 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002305
Luiz Capitulinob2023812011-09-21 17:16:47 -03002306 return head;
bellardb3380822004-03-14 21:38:54 +00002307}
thsa36e69d2007-12-02 05:18:19 +00002308
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002309/* Consider exposing this as a full fledged QMP command */
2310static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002311{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002312 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002313
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002314 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002315
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002316 if (bs->device_name[0]) {
2317 s->has_device = true;
2318 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002319 }
2320
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002321 s->stats = g_malloc0(sizeof(*s->stats));
2322 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2323 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2324 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2325 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2326 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2327 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2328 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2329 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2330 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2331
Kevin Wolf294cc352010-04-28 14:34:01 +02002332 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002333 s->has_parent = true;
2334 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002335 }
2336
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002337 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002338}
2339
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002340BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002341{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002342 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002343 BlockDriverState *bs;
2344
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002345 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002346 BlockStatsList *info = g_malloc0(sizeof(*info));
2347 info->value = qmp_query_blockstat(bs, NULL);
2348
2349 /* XXX: waiting for the qapi to support GSList */
2350 if (!cur_item) {
2351 head = cur_item = info;
2352 } else {
2353 cur_item->next = info;
2354 cur_item = info;
2355 }
thsa36e69d2007-12-02 05:18:19 +00002356 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002357
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002358 return head;
thsa36e69d2007-12-02 05:18:19 +00002359}
bellardea2384d2004-08-01 21:59:26 +00002360
aliguori045df332009-03-05 23:00:48 +00002361const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2362{
2363 if (bs->backing_hd && bs->backing_hd->encrypted)
2364 return bs->backing_file;
2365 else if (bs->encrypted)
2366 return bs->filename;
2367 else
2368 return NULL;
2369}
2370
ths5fafdf22007-09-16 21:08:06 +00002371void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002372 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002373{
Kevin Wolf3574c602011-10-26 11:02:11 +02002374 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002375}
2376
ths5fafdf22007-09-16 21:08:06 +00002377int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002378 const uint8_t *buf, int nb_sectors)
2379{
2380 BlockDriver *drv = bs->drv;
2381 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002382 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002383 if (!drv->bdrv_write_compressed)
2384 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002385 if (bdrv_check_request(bs, sector_num, nb_sectors))
2386 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002387
Jan Kiszkac6d22832009-11-30 18:21:20 +01002388 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002389 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2390 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002391
bellardfaea38e2006-08-05 21:31:00 +00002392 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2393}
ths3b46e622007-09-17 08:09:54 +00002394
bellardfaea38e2006-08-05 21:31:00 +00002395int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2396{
2397 BlockDriver *drv = bs->drv;
2398 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002399 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002400 if (!drv->bdrv_get_info)
2401 return -ENOTSUP;
2402 memset(bdi, 0, sizeof(*bdi));
2403 return drv->bdrv_get_info(bs, bdi);
2404}
2405
Christoph Hellwig45566e92009-07-10 23:11:57 +02002406int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2407 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002408{
2409 BlockDriver *drv = bs->drv;
2410 if (!drv)
2411 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002412 if (drv->bdrv_save_vmstate)
2413 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2414 if (bs->file)
2415 return bdrv_save_vmstate(bs->file, buf, pos, size);
2416 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002417}
2418
Christoph Hellwig45566e92009-07-10 23:11:57 +02002419int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2420 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002421{
2422 BlockDriver *drv = bs->drv;
2423 if (!drv)
2424 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002425 if (drv->bdrv_load_vmstate)
2426 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2427 if (bs->file)
2428 return bdrv_load_vmstate(bs->file, buf, pos, size);
2429 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002430}
2431
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002432void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2433{
2434 BlockDriver *drv = bs->drv;
2435
2436 if (!drv || !drv->bdrv_debug_event) {
2437 return;
2438 }
2439
2440 return drv->bdrv_debug_event(bs, event);
2441
2442}
2443
bellardfaea38e2006-08-05 21:31:00 +00002444/**************************************************************/
2445/* handling of snapshots */
2446
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002447int bdrv_can_snapshot(BlockDriverState *bs)
2448{
2449 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002450 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002451 return 0;
2452 }
2453
2454 if (!drv->bdrv_snapshot_create) {
2455 if (bs->file != NULL) {
2456 return bdrv_can_snapshot(bs->file);
2457 }
2458 return 0;
2459 }
2460
2461 return 1;
2462}
2463
Blue Swirl199630b2010-07-25 20:49:34 +00002464int bdrv_is_snapshot(BlockDriverState *bs)
2465{
2466 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2467}
2468
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002469BlockDriverState *bdrv_snapshots(void)
2470{
2471 BlockDriverState *bs;
2472
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002473 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002474 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002475 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002476
2477 bs = NULL;
2478 while ((bs = bdrv_next(bs))) {
2479 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002480 bs_snapshots = bs;
2481 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002482 }
2483 }
2484 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002485}
2486
ths5fafdf22007-09-16 21:08:06 +00002487int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002488 QEMUSnapshotInfo *sn_info)
2489{
2490 BlockDriver *drv = bs->drv;
2491 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002492 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002493 if (drv->bdrv_snapshot_create)
2494 return drv->bdrv_snapshot_create(bs, sn_info);
2495 if (bs->file)
2496 return bdrv_snapshot_create(bs->file, sn_info);
2497 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002498}
2499
ths5fafdf22007-09-16 21:08:06 +00002500int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002501 const char *snapshot_id)
2502{
2503 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002504 int ret, open_ret;
2505
bellardfaea38e2006-08-05 21:31:00 +00002506 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002507 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002508 if (drv->bdrv_snapshot_goto)
2509 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2510
2511 if (bs->file) {
2512 drv->bdrv_close(bs);
2513 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2514 open_ret = drv->bdrv_open(bs, bs->open_flags);
2515 if (open_ret < 0) {
2516 bdrv_delete(bs->file);
2517 bs->drv = NULL;
2518 return open_ret;
2519 }
2520 return ret;
2521 }
2522
2523 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002524}
2525
2526int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2527{
2528 BlockDriver *drv = bs->drv;
2529 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002530 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002531 if (drv->bdrv_snapshot_delete)
2532 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2533 if (bs->file)
2534 return bdrv_snapshot_delete(bs->file, snapshot_id);
2535 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002536}
2537
ths5fafdf22007-09-16 21:08:06 +00002538int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002539 QEMUSnapshotInfo **psn_info)
2540{
2541 BlockDriver *drv = bs->drv;
2542 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002543 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002544 if (drv->bdrv_snapshot_list)
2545 return drv->bdrv_snapshot_list(bs, psn_info);
2546 if (bs->file)
2547 return bdrv_snapshot_list(bs->file, psn_info);
2548 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002549}
2550
edison51ef6722010-09-21 19:58:41 -07002551int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2552 const char *snapshot_name)
2553{
2554 BlockDriver *drv = bs->drv;
2555 if (!drv) {
2556 return -ENOMEDIUM;
2557 }
2558 if (!bs->read_only) {
2559 return -EINVAL;
2560 }
2561 if (drv->bdrv_snapshot_load_tmp) {
2562 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2563 }
2564 return -ENOTSUP;
2565}
2566
bellardfaea38e2006-08-05 21:31:00 +00002567#define NB_SUFFIXES 4
2568
2569char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2570{
2571 static const char suffixes[NB_SUFFIXES] = "KMGT";
2572 int64_t base;
2573 int i;
2574
2575 if (size <= 999) {
2576 snprintf(buf, buf_size, "%" PRId64, size);
2577 } else {
2578 base = 1024;
2579 for(i = 0; i < NB_SUFFIXES; i++) {
2580 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002581 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002582 (double)size / base,
2583 suffixes[i]);
2584 break;
2585 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002586 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002587 ((size + (base >> 1)) / base),
2588 suffixes[i]);
2589 break;
2590 }
2591 base = base * 1024;
2592 }
2593 }
2594 return buf;
2595}
2596
2597char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2598{
2599 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002600#ifdef _WIN32
2601 struct tm *ptm;
2602#else
bellardfaea38e2006-08-05 21:31:00 +00002603 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002604#endif
bellardfaea38e2006-08-05 21:31:00 +00002605 time_t ti;
2606 int64_t secs;
2607
2608 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002609 snprintf(buf, buf_size,
2610 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002611 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2612 } else {
2613 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002614#ifdef _WIN32
2615 ptm = localtime(&ti);
2616 strftime(date_buf, sizeof(date_buf),
2617 "%Y-%m-%d %H:%M:%S", ptm);
2618#else
bellardfaea38e2006-08-05 21:31:00 +00002619 localtime_r(&ti, &tm);
2620 strftime(date_buf, sizeof(date_buf),
2621 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002622#endif
bellardfaea38e2006-08-05 21:31:00 +00002623 secs = sn->vm_clock_nsec / 1000000000;
2624 snprintf(clock_buf, sizeof(clock_buf),
2625 "%02d:%02d:%02d.%03d",
2626 (int)(secs / 3600),
2627 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002628 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002629 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2630 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002631 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002632 sn->id_str, sn->name,
2633 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2634 date_buf,
2635 clock_buf);
2636 }
2637 return buf;
2638}
2639
bellard83f64092006-08-01 16:21:11 +00002640/**************************************************************/
2641/* async I/Os */
2642
aliguori3b69e4b2009-01-22 16:59:24 +00002643BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002644 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002645 BlockDriverCompletionFunc *cb, void *opaque)
2646{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002647 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2648
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002649 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002650 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002651}
2652
aliguorif141eaf2009-04-07 18:43:24 +00002653BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2654 QEMUIOVector *qiov, int nb_sectors,
2655 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002656{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002657 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2658
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002659 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002660 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002661}
2662
Kevin Wolf40b4f532009-09-09 17:53:37 +02002663
2664typedef struct MultiwriteCB {
2665 int error;
2666 int num_requests;
2667 int num_callbacks;
2668 struct {
2669 BlockDriverCompletionFunc *cb;
2670 void *opaque;
2671 QEMUIOVector *free_qiov;
2672 void *free_buf;
2673 } callbacks[];
2674} MultiwriteCB;
2675
2676static void multiwrite_user_cb(MultiwriteCB *mcb)
2677{
2678 int i;
2679
2680 for (i = 0; i < mcb->num_callbacks; i++) {
2681 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002682 if (mcb->callbacks[i].free_qiov) {
2683 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2684 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002685 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002686 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002687 }
2688}
2689
2690static void multiwrite_cb(void *opaque, int ret)
2691{
2692 MultiwriteCB *mcb = opaque;
2693
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002694 trace_multiwrite_cb(mcb, ret);
2695
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002696 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002697 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002698 }
2699
2700 mcb->num_requests--;
2701 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002702 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002703 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002704 }
2705}
2706
2707static int multiwrite_req_compare(const void *a, const void *b)
2708{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002709 const BlockRequest *req1 = a, *req2 = b;
2710
2711 /*
2712 * Note that we can't simply subtract req2->sector from req1->sector
2713 * here as that could overflow the return value.
2714 */
2715 if (req1->sector > req2->sector) {
2716 return 1;
2717 } else if (req1->sector < req2->sector) {
2718 return -1;
2719 } else {
2720 return 0;
2721 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002722}
2723
2724/*
2725 * Takes a bunch of requests and tries to merge them. Returns the number of
2726 * requests that remain after merging.
2727 */
2728static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2729 int num_reqs, MultiwriteCB *mcb)
2730{
2731 int i, outidx;
2732
2733 // Sort requests by start sector
2734 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2735
2736 // Check if adjacent requests touch the same clusters. If so, combine them,
2737 // filling up gaps with zero sectors.
2738 outidx = 0;
2739 for (i = 1; i < num_reqs; i++) {
2740 int merge = 0;
2741 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2742
2743 // This handles the cases that are valid for all block drivers, namely
2744 // exactly sequential writes and overlapping writes.
2745 if (reqs[i].sector <= oldreq_last) {
2746 merge = 1;
2747 }
2748
2749 // The block driver may decide that it makes sense to combine requests
2750 // even if there is a gap of some sectors between them. In this case,
2751 // the gap is filled with zeros (therefore only applicable for yet
2752 // unused space in format like qcow2).
2753 if (!merge && bs->drv->bdrv_merge_requests) {
2754 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2755 }
2756
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002757 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2758 merge = 0;
2759 }
2760
Kevin Wolf40b4f532009-09-09 17:53:37 +02002761 if (merge) {
2762 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002763 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002764 qemu_iovec_init(qiov,
2765 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2766
2767 // Add the first request to the merged one. If the requests are
2768 // overlapping, drop the last sectors of the first request.
2769 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2770 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2771
2772 // We might need to add some zeros between the two requests
2773 if (reqs[i].sector > oldreq_last) {
2774 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2775 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2776 memset(buf, 0, zero_bytes);
2777 qemu_iovec_add(qiov, buf, zero_bytes);
2778 mcb->callbacks[i].free_buf = buf;
2779 }
2780
2781 // Add the second request
2782 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2783
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002784 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002785 reqs[outidx].qiov = qiov;
2786
2787 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2788 } else {
2789 outidx++;
2790 reqs[outidx].sector = reqs[i].sector;
2791 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2792 reqs[outidx].qiov = reqs[i].qiov;
2793 }
2794 }
2795
2796 return outidx + 1;
2797}
2798
2799/*
2800 * Submit multiple AIO write requests at once.
2801 *
2802 * On success, the function returns 0 and all requests in the reqs array have
2803 * been submitted. In error case this function returns -1, and any of the
2804 * requests may or may not be submitted yet. In particular, this means that the
2805 * callback will be called for some of the requests, for others it won't. The
2806 * caller must check the error field of the BlockRequest to wait for the right
2807 * callbacks (if error != 0, no callback will be called).
2808 *
2809 * The implementation may modify the contents of the reqs array, e.g. to merge
2810 * requests. However, the fields opaque and error are left unmodified as they
2811 * are used to signal failure for a single request to the caller.
2812 */
2813int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2814{
2815 BlockDriverAIOCB *acb;
2816 MultiwriteCB *mcb;
2817 int i;
2818
Ryan Harper301db7c2011-03-07 10:01:04 -06002819 /* don't submit writes if we don't have a medium */
2820 if (bs->drv == NULL) {
2821 for (i = 0; i < num_reqs; i++) {
2822 reqs[i].error = -ENOMEDIUM;
2823 }
2824 return -1;
2825 }
2826
Kevin Wolf40b4f532009-09-09 17:53:37 +02002827 if (num_reqs == 0) {
2828 return 0;
2829 }
2830
2831 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002832 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002833 mcb->num_requests = 0;
2834 mcb->num_callbacks = num_reqs;
2835
2836 for (i = 0; i < num_reqs; i++) {
2837 mcb->callbacks[i].cb = reqs[i].cb;
2838 mcb->callbacks[i].opaque = reqs[i].opaque;
2839 }
2840
2841 // Check for mergable requests
2842 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2843
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002844 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2845
Kevin Wolf453f9a12010-07-02 14:01:21 +02002846 /*
2847 * Run the aio requests. As soon as one request can't be submitted
2848 * successfully, fail all requests that are not yet submitted (we must
2849 * return failure for all requests anyway)
2850 *
2851 * num_requests cannot be set to the right value immediately: If
2852 * bdrv_aio_writev fails for some request, num_requests would be too high
2853 * and therefore multiwrite_cb() would never recognize the multiwrite
2854 * request as completed. We also cannot use the loop variable i to set it
2855 * when the first request fails because the callback may already have been
2856 * called for previously submitted requests. Thus, num_requests must be
2857 * incremented for each request that is submitted.
2858 *
2859 * The problem that callbacks may be called early also means that we need
2860 * to take care that num_requests doesn't become 0 before all requests are
2861 * submitted - multiwrite_cb() would consider the multiwrite request
2862 * completed. A dummy request that is "completed" by a manual call to
2863 * multiwrite_cb() takes care of this.
2864 */
2865 mcb->num_requests = 1;
2866
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002867 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002868 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002869 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002870 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2871 reqs[i].nb_sectors, multiwrite_cb, mcb);
2872
2873 if (acb == NULL) {
2874 // We can only fail the whole thing if no request has been
2875 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2876 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002877 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002878 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002879 goto fail;
2880 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002881 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002882 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002883 break;
2884 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002885 }
2886 }
2887
Kevin Wolf453f9a12010-07-02 14:01:21 +02002888 /* Complete the dummy request */
2889 multiwrite_cb(mcb, 0);
2890
Kevin Wolf40b4f532009-09-09 17:53:37 +02002891 return 0;
2892
2893fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002894 for (i = 0; i < mcb->num_callbacks; i++) {
2895 reqs[i].error = -EIO;
2896 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002897 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002898 return -1;
2899}
2900
bellard83f64092006-08-01 16:21:11 +00002901void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002902{
aliguori6bbff9a2009-03-20 18:25:59 +00002903 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002904}
2905
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002906/* block I/O throttling */
2907static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2908 bool is_write, double elapsed_time, uint64_t *wait)
2909{
2910 uint64_t bps_limit = 0;
2911 double bytes_limit, bytes_base, bytes_res;
2912 double slice_time, wait_time;
2913
2914 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2915 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2916 } else if (bs->io_limits.bps[is_write]) {
2917 bps_limit = bs->io_limits.bps[is_write];
2918 } else {
2919 if (wait) {
2920 *wait = 0;
2921 }
2922
2923 return false;
2924 }
2925
2926 slice_time = bs->slice_end - bs->slice_start;
2927 slice_time /= (NANOSECONDS_PER_SECOND);
2928 bytes_limit = bps_limit * slice_time;
2929 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2930 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2931 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2932 }
2933
2934 /* bytes_base: the bytes of data which have been read/written; and
2935 * it is obtained from the history statistic info.
2936 * bytes_res: the remaining bytes of data which need to be read/written.
2937 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2938 * the total time for completing reading/writting all data.
2939 */
2940 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2941
2942 if (bytes_base + bytes_res <= bytes_limit) {
2943 if (wait) {
2944 *wait = 0;
2945 }
2946
2947 return false;
2948 }
2949
2950 /* Calc approx time to dispatch */
2951 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2952
2953 /* When the I/O rate at runtime exceeds the limits,
2954 * bs->slice_end need to be extended in order that the current statistic
2955 * info can be kept until the timer fire, so it is increased and tuned
2956 * based on the result of experiment.
2957 */
2958 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2959 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2960 if (wait) {
2961 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2962 }
2963
2964 return true;
2965}
2966
2967static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2968 double elapsed_time, uint64_t *wait)
2969{
2970 uint64_t iops_limit = 0;
2971 double ios_limit, ios_base;
2972 double slice_time, wait_time;
2973
2974 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2975 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2976 } else if (bs->io_limits.iops[is_write]) {
2977 iops_limit = bs->io_limits.iops[is_write];
2978 } else {
2979 if (wait) {
2980 *wait = 0;
2981 }
2982
2983 return false;
2984 }
2985
2986 slice_time = bs->slice_end - bs->slice_start;
2987 slice_time /= (NANOSECONDS_PER_SECOND);
2988 ios_limit = iops_limit * slice_time;
2989 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2990 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2991 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2992 }
2993
2994 if (ios_base + 1 <= ios_limit) {
2995 if (wait) {
2996 *wait = 0;
2997 }
2998
2999 return false;
3000 }
3001
3002 /* Calc approx time to dispatch */
3003 wait_time = (ios_base + 1) / iops_limit;
3004 if (wait_time > elapsed_time) {
3005 wait_time = wait_time - elapsed_time;
3006 } else {
3007 wait_time = 0;
3008 }
3009
3010 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3011 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3012 if (wait) {
3013 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3014 }
3015
3016 return true;
3017}
3018
3019static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3020 bool is_write, int64_t *wait)
3021{
3022 int64_t now, max_wait;
3023 uint64_t bps_wait = 0, iops_wait = 0;
3024 double elapsed_time;
3025 int bps_ret, iops_ret;
3026
3027 now = qemu_get_clock_ns(vm_clock);
3028 if ((bs->slice_start < now)
3029 && (bs->slice_end > now)) {
3030 bs->slice_end = now + bs->slice_time;
3031 } else {
3032 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3033 bs->slice_start = now;
3034 bs->slice_end = now + bs->slice_time;
3035
3036 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3037 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3038
3039 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3040 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3041 }
3042
3043 elapsed_time = now - bs->slice_start;
3044 elapsed_time /= (NANOSECONDS_PER_SECOND);
3045
3046 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3047 is_write, elapsed_time, &bps_wait);
3048 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3049 elapsed_time, &iops_wait);
3050 if (bps_ret || iops_ret) {
3051 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3052 if (wait) {
3053 *wait = max_wait;
3054 }
3055
3056 now = qemu_get_clock_ns(vm_clock);
3057 if (bs->slice_end < now + max_wait) {
3058 bs->slice_end = now + max_wait;
3059 }
3060
3061 return true;
3062 }
3063
3064 if (wait) {
3065 *wait = 0;
3066 }
3067
3068 return false;
3069}
pbrookce1a14d2006-08-07 02:38:06 +00003070
bellard83f64092006-08-01 16:21:11 +00003071/**************************************************************/
3072/* async block device emulation */
3073
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003074typedef struct BlockDriverAIOCBSync {
3075 BlockDriverAIOCB common;
3076 QEMUBH *bh;
3077 int ret;
3078 /* vector translation state */
3079 QEMUIOVector *qiov;
3080 uint8_t *bounce;
3081 int is_write;
3082} BlockDriverAIOCBSync;
3083
3084static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3085{
Kevin Wolfb666d232010-05-05 11:44:39 +02003086 BlockDriverAIOCBSync *acb =
3087 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003088 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003089 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003090 qemu_aio_release(acb);
3091}
3092
3093static AIOPool bdrv_em_aio_pool = {
3094 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3095 .cancel = bdrv_aio_cancel_em,
3096};
3097
bellard83f64092006-08-01 16:21:11 +00003098static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003099{
pbrookce1a14d2006-08-07 02:38:06 +00003100 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003101
aliguorif141eaf2009-04-07 18:43:24 +00003102 if (!acb->is_write)
3103 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003104 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003105 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003106 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003107 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003108 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003109}
bellardbeac80c2006-06-26 20:08:57 +00003110
aliguorif141eaf2009-04-07 18:43:24 +00003111static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3112 int64_t sector_num,
3113 QEMUIOVector *qiov,
3114 int nb_sectors,
3115 BlockDriverCompletionFunc *cb,
3116 void *opaque,
3117 int is_write)
3118
bellardea2384d2004-08-01 21:59:26 +00003119{
pbrookce1a14d2006-08-07 02:38:06 +00003120 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003121
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003122 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003123 acb->is_write = is_write;
3124 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003125 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00003126
pbrookce1a14d2006-08-07 02:38:06 +00003127 if (!acb->bh)
3128 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003129
3130 if (is_write) {
3131 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003132 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003133 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003134 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003135 }
3136
pbrookce1a14d2006-08-07 02:38:06 +00003137 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003138
pbrookce1a14d2006-08-07 02:38:06 +00003139 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003140}
3141
aliguorif141eaf2009-04-07 18:43:24 +00003142static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3143 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003144 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003145{
aliguorif141eaf2009-04-07 18:43:24 +00003146 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003147}
3148
aliguorif141eaf2009-04-07 18:43:24 +00003149static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3150 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3151 BlockDriverCompletionFunc *cb, void *opaque)
3152{
3153 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3154}
3155
Kevin Wolf68485422011-06-30 10:05:46 +02003156
3157typedef struct BlockDriverAIOCBCoroutine {
3158 BlockDriverAIOCB common;
3159 BlockRequest req;
3160 bool is_write;
3161 QEMUBH* bh;
3162} BlockDriverAIOCBCoroutine;
3163
3164static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3165{
3166 qemu_aio_flush();
3167}
3168
3169static AIOPool bdrv_em_co_aio_pool = {
3170 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3171 .cancel = bdrv_aio_co_cancel_em,
3172};
3173
Paolo Bonzini35246a62011-10-14 10:41:29 +02003174static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003175{
3176 BlockDriverAIOCBCoroutine *acb = opaque;
3177
3178 acb->common.cb(acb->common.opaque, acb->req.error);
3179 qemu_bh_delete(acb->bh);
3180 qemu_aio_release(acb);
3181}
3182
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003183/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3184static void coroutine_fn bdrv_co_do_rw(void *opaque)
3185{
3186 BlockDriverAIOCBCoroutine *acb = opaque;
3187 BlockDriverState *bs = acb->common.bs;
3188
3189 if (!acb->is_write) {
3190 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3191 acb->req.nb_sectors, acb->req.qiov);
3192 } else {
3193 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3194 acb->req.nb_sectors, acb->req.qiov);
3195 }
3196
Paolo Bonzini35246a62011-10-14 10:41:29 +02003197 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003198 qemu_bh_schedule(acb->bh);
3199}
3200
Kevin Wolf68485422011-06-30 10:05:46 +02003201static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3202 int64_t sector_num,
3203 QEMUIOVector *qiov,
3204 int nb_sectors,
3205 BlockDriverCompletionFunc *cb,
3206 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003207 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003208{
3209 Coroutine *co;
3210 BlockDriverAIOCBCoroutine *acb;
3211
3212 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3213 acb->req.sector = sector_num;
3214 acb->req.nb_sectors = nb_sectors;
3215 acb->req.qiov = qiov;
3216 acb->is_write = is_write;
3217
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003218 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003219 qemu_coroutine_enter(co, acb);
3220
3221 return &acb->common;
3222}
3223
Paolo Bonzini07f07612011-10-17 12:32:12 +02003224static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003225{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003226 BlockDriverAIOCBCoroutine *acb = opaque;
3227 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003228
Paolo Bonzini07f07612011-10-17 12:32:12 +02003229 acb->req.error = bdrv_co_flush(bs);
3230 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003231 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003232}
3233
Paolo Bonzini07f07612011-10-17 12:32:12 +02003234BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003235 BlockDriverCompletionFunc *cb, void *opaque)
3236{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003237 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003238
Paolo Bonzini07f07612011-10-17 12:32:12 +02003239 Coroutine *co;
3240 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003241
Paolo Bonzini07f07612011-10-17 12:32:12 +02003242 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3243 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3244 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003245
Alexander Graf016f5cf2010-05-26 17:51:49 +02003246 return &acb->common;
3247}
3248
Paolo Bonzini4265d622011-10-17 12:32:14 +02003249static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3250{
3251 BlockDriverAIOCBCoroutine *acb = opaque;
3252 BlockDriverState *bs = acb->common.bs;
3253
3254 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3255 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3256 qemu_bh_schedule(acb->bh);
3257}
3258
3259BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3260 int64_t sector_num, int nb_sectors,
3261 BlockDriverCompletionFunc *cb, void *opaque)
3262{
3263 Coroutine *co;
3264 BlockDriverAIOCBCoroutine *acb;
3265
3266 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3267
3268 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3269 acb->req.sector = sector_num;
3270 acb->req.nb_sectors = nb_sectors;
3271 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3272 qemu_coroutine_enter(co, acb);
3273
3274 return &acb->common;
3275}
3276
bellardea2384d2004-08-01 21:59:26 +00003277void bdrv_init(void)
3278{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003279 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003280}
pbrookce1a14d2006-08-07 02:38:06 +00003281
Markus Armbrustereb852012009-10-27 18:41:44 +01003282void bdrv_init_with_whitelist(void)
3283{
3284 use_bdrv_whitelist = 1;
3285 bdrv_init();
3286}
3287
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003288void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3289 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003290{
pbrookce1a14d2006-08-07 02:38:06 +00003291 BlockDriverAIOCB *acb;
3292
aliguori6bbff9a2009-03-20 18:25:59 +00003293 if (pool->free_aiocb) {
3294 acb = pool->free_aiocb;
3295 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003296 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003297 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003298 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003299 }
3300 acb->bs = bs;
3301 acb->cb = cb;
3302 acb->opaque = opaque;
3303 return acb;
3304}
3305
3306void qemu_aio_release(void *p)
3307{
aliguori6bbff9a2009-03-20 18:25:59 +00003308 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3309 AIOPool *pool = acb->pool;
3310 acb->next = pool->free_aiocb;
3311 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003312}
bellard19cb3732006-08-19 11:45:59 +00003313
3314/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003315/* Coroutine block device emulation */
3316
3317typedef struct CoroutineIOCompletion {
3318 Coroutine *coroutine;
3319 int ret;
3320} CoroutineIOCompletion;
3321
3322static void bdrv_co_io_em_complete(void *opaque, int ret)
3323{
3324 CoroutineIOCompletion *co = opaque;
3325
3326 co->ret = ret;
3327 qemu_coroutine_enter(co->coroutine, NULL);
3328}
3329
3330static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3331 int nb_sectors, QEMUIOVector *iov,
3332 bool is_write)
3333{
3334 CoroutineIOCompletion co = {
3335 .coroutine = qemu_coroutine_self(),
3336 };
3337 BlockDriverAIOCB *acb;
3338
3339 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003340 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3341 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003342 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003343 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3344 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003345 }
3346
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003347 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003348 if (!acb) {
3349 return -EIO;
3350 }
3351 qemu_coroutine_yield();
3352
3353 return co.ret;
3354}
3355
3356static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3357 int64_t sector_num, int nb_sectors,
3358 QEMUIOVector *iov)
3359{
3360 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3361}
3362
3363static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3364 int64_t sector_num, int nb_sectors,
3365 QEMUIOVector *iov)
3366{
3367 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3368}
3369
Paolo Bonzini07f07612011-10-17 12:32:12 +02003370static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003371{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003372 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003373
Paolo Bonzini07f07612011-10-17 12:32:12 +02003374 rwco->ret = bdrv_co_flush(rwco->bs);
3375}
3376
3377int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3378{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003379 int ret;
3380
Kevin Wolfca716362011-11-10 18:13:59 +01003381 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003382 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003383 }
3384
Kevin Wolfca716362011-11-10 18:13:59 +01003385 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003386 if (bs->drv->bdrv_co_flush_to_os) {
3387 ret = bs->drv->bdrv_co_flush_to_os(bs);
3388 if (ret < 0) {
3389 return ret;
3390 }
3391 }
3392
Kevin Wolfca716362011-11-10 18:13:59 +01003393 /* But don't actually force it to the disk with cache=unsafe */
3394 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3395 return 0;
3396 }
3397
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003398 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003399 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003400 } else if (bs->drv->bdrv_aio_flush) {
3401 BlockDriverAIOCB *acb;
3402 CoroutineIOCompletion co = {
3403 .coroutine = qemu_coroutine_self(),
3404 };
3405
3406 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3407 if (acb == NULL) {
3408 return -EIO;
3409 } else {
3410 qemu_coroutine_yield();
3411 return co.ret;
3412 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003413 } else {
3414 /*
3415 * Some block drivers always operate in either writethrough or unsafe
3416 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3417 * know how the server works (because the behaviour is hardcoded or
3418 * depends on server-side configuration), so we can't ensure that
3419 * everything is safe on disk. Returning an error doesn't work because
3420 * that would break guests even if the server operates in writethrough
3421 * mode.
3422 *
3423 * Let's hope the user knows what he's doing.
3424 */
3425 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003426 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003427}
3428
Anthony Liguori0f154232011-11-14 15:09:45 -06003429void bdrv_invalidate_cache(BlockDriverState *bs)
3430{
3431 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3432 bs->drv->bdrv_invalidate_cache(bs);
3433 }
3434}
3435
3436void bdrv_invalidate_cache_all(void)
3437{
3438 BlockDriverState *bs;
3439
3440 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3441 bdrv_invalidate_cache(bs);
3442 }
3443}
3444
Paolo Bonzini07f07612011-10-17 12:32:12 +02003445int bdrv_flush(BlockDriverState *bs)
3446{
3447 Coroutine *co;
3448 RwCo rwco = {
3449 .bs = bs,
3450 .ret = NOT_DONE,
3451 };
3452
3453 if (qemu_in_coroutine()) {
3454 /* Fast-path if already in coroutine context */
3455 bdrv_flush_co_entry(&rwco);
3456 } else {
3457 co = qemu_coroutine_create(bdrv_flush_co_entry);
3458 qemu_coroutine_enter(co, &rwco);
3459 while (rwco.ret == NOT_DONE) {
3460 qemu_aio_wait();
3461 }
3462 }
3463
3464 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003465}
3466
Paolo Bonzini4265d622011-10-17 12:32:14 +02003467static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3468{
3469 RwCo *rwco = opaque;
3470
3471 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3472}
3473
3474int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3475 int nb_sectors)
3476{
3477 if (!bs->drv) {
3478 return -ENOMEDIUM;
3479 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3480 return -EIO;
3481 } else if (bs->read_only) {
3482 return -EROFS;
3483 } else if (bs->drv->bdrv_co_discard) {
3484 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3485 } else if (bs->drv->bdrv_aio_discard) {
3486 BlockDriverAIOCB *acb;
3487 CoroutineIOCompletion co = {
3488 .coroutine = qemu_coroutine_self(),
3489 };
3490
3491 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3492 bdrv_co_io_em_complete, &co);
3493 if (acb == NULL) {
3494 return -EIO;
3495 } else {
3496 qemu_coroutine_yield();
3497 return co.ret;
3498 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003499 } else {
3500 return 0;
3501 }
3502}
3503
3504int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3505{
3506 Coroutine *co;
3507 RwCo rwco = {
3508 .bs = bs,
3509 .sector_num = sector_num,
3510 .nb_sectors = nb_sectors,
3511 .ret = NOT_DONE,
3512 };
3513
3514 if (qemu_in_coroutine()) {
3515 /* Fast-path if already in coroutine context */
3516 bdrv_discard_co_entry(&rwco);
3517 } else {
3518 co = qemu_coroutine_create(bdrv_discard_co_entry);
3519 qemu_coroutine_enter(co, &rwco);
3520 while (rwco.ret == NOT_DONE) {
3521 qemu_aio_wait();
3522 }
3523 }
3524
3525 return rwco.ret;
3526}
3527
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003528/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003529/* removable device support */
3530
3531/**
3532 * Return TRUE if the media is present
3533 */
3534int bdrv_is_inserted(BlockDriverState *bs)
3535{
3536 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003537
bellard19cb3732006-08-19 11:45:59 +00003538 if (!drv)
3539 return 0;
3540 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003541 return 1;
3542 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003543}
3544
3545/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003546 * Return whether the media changed since the last call to this
3547 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003548 */
3549int bdrv_media_changed(BlockDriverState *bs)
3550{
3551 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003552
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003553 if (drv && drv->bdrv_media_changed) {
3554 return drv->bdrv_media_changed(bs);
3555 }
3556 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003557}
3558
3559/**
3560 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3561 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003562void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003563{
3564 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003565
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003566 if (drv && drv->bdrv_eject) {
3567 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003568 }
bellard19cb3732006-08-19 11:45:59 +00003569}
3570
bellard19cb3732006-08-19 11:45:59 +00003571/**
3572 * Lock or unlock the media (if it is locked, the user won't be able
3573 * to eject it manually).
3574 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003575void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003576{
3577 BlockDriver *drv = bs->drv;
3578
Markus Armbruster025e8492011-09-06 18:58:47 +02003579 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003580
Markus Armbruster025e8492011-09-06 18:58:47 +02003581 if (drv && drv->bdrv_lock_medium) {
3582 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003583 }
3584}
ths985a03b2007-12-24 16:10:43 +00003585
3586/* needed for generic scsi interface */
3587
3588int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3589{
3590 BlockDriver *drv = bs->drv;
3591
3592 if (drv && drv->bdrv_ioctl)
3593 return drv->bdrv_ioctl(bs, req, buf);
3594 return -ENOTSUP;
3595}
aliguori7d780662009-03-12 19:57:08 +00003596
aliguori221f7152009-03-28 17:28:41 +00003597BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3598 unsigned long int req, void *buf,
3599 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003600{
aliguori221f7152009-03-28 17:28:41 +00003601 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003602
aliguori221f7152009-03-28 17:28:41 +00003603 if (drv && drv->bdrv_aio_ioctl)
3604 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3605 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003606}
aliguorie268ca52009-04-22 20:20:00 +00003607
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003608void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3609{
3610 bs->buffer_alignment = align;
3611}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003612
aliguorie268ca52009-04-22 20:20:00 +00003613void *qemu_blockalign(BlockDriverState *bs, size_t size)
3614{
3615 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3616}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003617
3618void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3619{
3620 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003621
Liran Schouraaa0eb72010-01-26 10:31:48 +02003622 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003623 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003624 if (!bs->dirty_bitmap) {
3625 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3626 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3627 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003628
Anthony Liguori7267c092011-08-20 22:09:37 -05003629 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003630 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003631 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003632 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003633 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003634 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003635 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003636 }
3637}
3638
3639int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3640{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003641 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003642
Jan Kiszkac6d22832009-11-30 18:21:20 +01003643 if (bs->dirty_bitmap &&
3644 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003645 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3646 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003647 } else {
3648 return 0;
3649 }
3650}
3651
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003652void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3653 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003654{
3655 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3656}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003657
3658int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3659{
3660 return bs->dirty_count;
3661}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003662
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003663void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3664{
3665 assert(bs->in_use != in_use);
3666 bs->in_use = in_use;
3667}
3668
3669int bdrv_in_use(BlockDriverState *bs)
3670{
3671 return bs->in_use;
3672}
3673
Luiz Capitulino28a72822011-09-26 17:43:50 -03003674void bdrv_iostatus_enable(BlockDriverState *bs)
3675{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003676 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003677 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003678}
3679
3680/* The I/O status is only enabled if the drive explicitly
3681 * enables it _and_ the VM is configured to stop on errors */
3682bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3683{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003684 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003685 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3686 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3687 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3688}
3689
3690void bdrv_iostatus_disable(BlockDriverState *bs)
3691{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003692 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003693}
3694
3695void bdrv_iostatus_reset(BlockDriverState *bs)
3696{
3697 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003698 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003699 }
3700}
3701
3702/* XXX: Today this is set by device models because it makes the implementation
3703 quite simple. However, the block layer knows about the error, so it's
3704 possible to implement this without device models being involved */
3705void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3706{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003707 if (bdrv_iostatus_is_enabled(bs) &&
3708 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003709 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003710 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3711 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003712 }
3713}
3714
Christoph Hellwiga597e792011-08-25 08:26:01 +02003715void
3716bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3717 enum BlockAcctType type)
3718{
3719 assert(type < BDRV_MAX_IOTYPE);
3720
3721 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003722 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003723 cookie->type = type;
3724}
3725
3726void
3727bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3728{
3729 assert(cookie->type < BDRV_MAX_IOTYPE);
3730
3731 bs->nr_bytes[cookie->type] += cookie->bytes;
3732 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003733 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003734}
3735
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003736int bdrv_img_create(const char *filename, const char *fmt,
3737 const char *base_filename, const char *base_fmt,
3738 char *options, uint64_t img_size, int flags)
3739{
3740 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003741 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003742 BlockDriverState *bs = NULL;
3743 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003744 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003745 int ret = 0;
3746
3747 /* Find driver and parse its options */
3748 drv = bdrv_find_format(fmt);
3749 if (!drv) {
3750 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003751 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003752 goto out;
3753 }
3754
3755 proto_drv = bdrv_find_protocol(filename);
3756 if (!proto_drv) {
3757 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003758 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003759 goto out;
3760 }
3761
3762 create_options = append_option_parameters(create_options,
3763 drv->create_options);
3764 create_options = append_option_parameters(create_options,
3765 proto_drv->create_options);
3766
3767 /* Create parameter list with default values */
3768 param = parse_option_parameters("", create_options, param);
3769
3770 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3771
3772 /* Parse -o options */
3773 if (options) {
3774 param = parse_option_parameters(options, create_options, param);
3775 if (param == NULL) {
3776 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003777 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003778 goto out;
3779 }
3780 }
3781
3782 if (base_filename) {
3783 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3784 base_filename)) {
3785 error_report("Backing file not supported for file format '%s'",
3786 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003787 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003788 goto out;
3789 }
3790 }
3791
3792 if (base_fmt) {
3793 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3794 error_report("Backing file format not supported for file "
3795 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003796 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003797 goto out;
3798 }
3799 }
3800
Jes Sorensen792da932010-12-16 13:52:17 +01003801 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3802 if (backing_file && backing_file->value.s) {
3803 if (!strcmp(filename, backing_file->value.s)) {
3804 error_report("Error: Trying to create an image with the "
3805 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003806 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003807 goto out;
3808 }
3809 }
3810
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003811 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3812 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003813 backing_drv = bdrv_find_format(backing_fmt->value.s);
3814 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003815 error_report("Unknown backing file format '%s'",
3816 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003817 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003818 goto out;
3819 }
3820 }
3821
3822 // The size for the image must always be specified, with one exception:
3823 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003824 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3825 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003826 if (backing_file && backing_file->value.s) {
3827 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003828 char buf[32];
3829
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003830 bs = bdrv_new("");
3831
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003832 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003833 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003834 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003835 goto out;
3836 }
3837 bdrv_get_geometry(bs, &size);
3838 size *= 512;
3839
3840 snprintf(buf, sizeof(buf), "%" PRId64, size);
3841 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3842 } else {
3843 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003844 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003845 goto out;
3846 }
3847 }
3848
3849 printf("Formatting '%s', fmt=%s ", filename, fmt);
3850 print_option_parameters(param);
3851 puts("");
3852
3853 ret = bdrv_create(drv, filename, param);
3854
3855 if (ret < 0) {
3856 if (ret == -ENOTSUP) {
3857 error_report("Formatting or formatting option not supported for "
3858 "file format '%s'", fmt);
3859 } else if (ret == -EFBIG) {
3860 error_report("The image size is too large for file format '%s'",
3861 fmt);
3862 } else {
3863 error_report("%s: error while creating %s: %s", filename, fmt,
3864 strerror(-ret));
3865 }
3866 }
3867
3868out:
3869 free_option_parameters(create_options);
3870 free_option_parameters(param);
3871
3872 if (bs) {
3873 bdrv_delete(bs);
3874 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003875
3876 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003877}