blob: 50ce2be48ed0ace82cf986f2440118d5b967dd70 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Ryan Harperd22b2f42011-03-29 20:51:47 -0500849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
bellardb3380822004-03-14 21:38:54 +0000859void bdrv_delete(BlockDriverState *bs)
860{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200861 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200862
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100863 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500864 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000865
bellardb3380822004-03-14 21:38:54 +0000866 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200871 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500872 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000873}
874
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200877{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200878 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200879 return -EBUSY;
880 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200881 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300882 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200883 return 0;
884}
885
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200888{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200901 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200902}
903
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200906{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200907 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200908}
909
Markus Armbruster0e49de52011-08-03 15:07:41 +0200910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918}
919
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200921{
Markus Armbruster145feb12011-08-03 15:07:42 +0200922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200924 }
925}
926
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
Markus Armbrustere4def802011-09-06 18:58:53 +0200939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
Markus Armbruster145feb12011-08-03 15:07:42 +0200947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200951 }
952}
953
Markus Armbrusterf1076392011-09-06 18:58:46 +0200954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
aliguorie97fc192009-04-21 23:11:50 +0000962/*
963 * Run consistency checks on an image
964 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200965 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200966 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200967 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000968 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
Kevin Wolfe076f332010-06-29 11:43:13 +0200975 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200976 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000977}
978
Kevin Wolf8a426612010-07-16 17:17:01 +0200979#define COMMIT_BUF_SECTORS 2048
980
bellard33e39632003-07-06 17:15:21 +0000981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
bellard19cb3732006-08-19 11:45:59 +0000984 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +0200985 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +0200986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200988 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +0200989 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +0000992
bellard19cb3732006-08-19 11:45:59 +0000993 if (!drv)
994 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +0000998 }
999
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
Kevin Wolfee181192010-08-05 13:05:22 +02001003
1004 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001032 }
bellardea2384d2004-08-01 21:59:26 +00001033
Jan Kiszka6ea44302009-11-30 18:21:19 +01001034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001036
Kevin Wolf8a426612010-07-16 17:17:01 +02001037 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
bellardea2384d2004-08-01 21:59:26 +00001049 }
1050 }
bellard95389c82005-12-18 18:28:15 +00001051
Christoph Hellwig1d449522010-01-17 12:32:30 +01001052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
bellard95389c82005-12-18 18:28:15 +00001056
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001063
1064ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
Christoph Hellwig1d449522010-01-17 12:32:30 +01001084 return ret;
bellard33e39632003-07-06 17:15:21 +00001085}
1086
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001102 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001103 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001104};
1105
1106/**
1107 * Remove an active request from the tracked requests list
1108 *
1109 * This function should be called when a tracked request is completing.
1110 */
1111static void tracked_request_end(BdrvTrackedRequest *req)
1112{
1113 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001114 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001115}
1116
1117/**
1118 * Add an active request to the tracked requests list
1119 */
1120static void tracked_request_begin(BdrvTrackedRequest *req,
1121 BlockDriverState *bs,
1122 int64_t sector_num,
1123 int nb_sectors, bool is_write)
1124{
1125 *req = (BdrvTrackedRequest){
1126 .bs = bs,
1127 .sector_num = sector_num,
1128 .nb_sectors = nb_sectors,
1129 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001130 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001131 };
1132
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001133 qemu_co_queue_init(&req->wait_queue);
1134
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001135 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1136}
1137
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001138/**
1139 * Round a region to cluster boundaries
1140 */
1141static void round_to_clusters(BlockDriverState *bs,
1142 int64_t sector_num, int nb_sectors,
1143 int64_t *cluster_sector_num,
1144 int *cluster_nb_sectors)
1145{
1146 BlockDriverInfo bdi;
1147
1148 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1149 *cluster_sector_num = sector_num;
1150 *cluster_nb_sectors = nb_sectors;
1151 } else {
1152 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1153 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1154 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1155 nb_sectors, c);
1156 }
1157}
1158
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001159static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1160 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001161 /* aaaa bbbb */
1162 if (sector_num >= req->sector_num + req->nb_sectors) {
1163 return false;
1164 }
1165 /* bbbb aaaa */
1166 if (req->sector_num >= sector_num + nb_sectors) {
1167 return false;
1168 }
1169 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001170}
1171
1172static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1173 int64_t sector_num, int nb_sectors)
1174{
1175 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001176 int64_t cluster_sector_num;
1177 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001178 bool retry;
1179
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001180 /* If we touch the same cluster it counts as an overlap. This guarantees
1181 * that allocating writes will be serialized and not race with each other
1182 * for the same cluster. For example, in copy-on-read it ensures that the
1183 * CoR read and write operations are atomic and guest writes cannot
1184 * interleave between them.
1185 */
1186 round_to_clusters(bs, sector_num, nb_sectors,
1187 &cluster_sector_num, &cluster_nb_sectors);
1188
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001189 do {
1190 retry = false;
1191 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001192 if (tracked_request_overlaps(req, cluster_sector_num,
1193 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001194 /* Hitting this means there was a reentrant request, for
1195 * example, a block driver issuing nested requests. This must
1196 * never happen since it means deadlock.
1197 */
1198 assert(qemu_coroutine_self() != req->co);
1199
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001200 qemu_co_queue_wait(&req->wait_queue);
1201 retry = true;
1202 break;
1203 }
1204 }
1205 } while (retry);
1206}
1207
Kevin Wolf756e6732010-01-12 12:55:17 +01001208/*
1209 * Return values:
1210 * 0 - success
1211 * -EINVAL - backing format specified, but no file
1212 * -ENOSPC - can't update the backing file because no space is left in the
1213 * image file header
1214 * -ENOTSUP - format driver doesn't support changing the backing file
1215 */
1216int bdrv_change_backing_file(BlockDriverState *bs,
1217 const char *backing_file, const char *backing_fmt)
1218{
1219 BlockDriver *drv = bs->drv;
1220
1221 if (drv->bdrv_change_backing_file != NULL) {
1222 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1223 } else {
1224 return -ENOTSUP;
1225 }
1226}
1227
aliguori71d07702009-03-03 17:37:16 +00001228static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1229 size_t size)
1230{
1231 int64_t len;
1232
1233 if (!bdrv_is_inserted(bs))
1234 return -ENOMEDIUM;
1235
1236 if (bs->growable)
1237 return 0;
1238
1239 len = bdrv_getlength(bs);
1240
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001241 if (offset < 0)
1242 return -EIO;
1243
1244 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001245 return -EIO;
1246
1247 return 0;
1248}
1249
1250static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1251 int nb_sectors)
1252{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001253 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1254 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001255}
1256
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001257typedef struct RwCo {
1258 BlockDriverState *bs;
1259 int64_t sector_num;
1260 int nb_sectors;
1261 QEMUIOVector *qiov;
1262 bool is_write;
1263 int ret;
1264} RwCo;
1265
1266static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1267{
1268 RwCo *rwco = opaque;
1269
1270 if (!rwco->is_write) {
1271 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1272 rwco->nb_sectors, rwco->qiov);
1273 } else {
1274 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1275 rwco->nb_sectors, rwco->qiov);
1276 }
1277}
1278
1279/*
1280 * Process a synchronous request using coroutines
1281 */
1282static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1283 int nb_sectors, bool is_write)
1284{
1285 QEMUIOVector qiov;
1286 struct iovec iov = {
1287 .iov_base = (void *)buf,
1288 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1289 };
1290 Coroutine *co;
1291 RwCo rwco = {
1292 .bs = bs,
1293 .sector_num = sector_num,
1294 .nb_sectors = nb_sectors,
1295 .qiov = &qiov,
1296 .is_write = is_write,
1297 .ret = NOT_DONE,
1298 };
1299
1300 qemu_iovec_init_external(&qiov, &iov, 1);
1301
1302 if (qemu_in_coroutine()) {
1303 /* Fast-path if already in coroutine context */
1304 bdrv_rw_co_entry(&rwco);
1305 } else {
1306 co = qemu_coroutine_create(bdrv_rw_co_entry);
1307 qemu_coroutine_enter(co, &rwco);
1308 while (rwco.ret == NOT_DONE) {
1309 qemu_aio_wait();
1310 }
1311 }
1312 return rwco.ret;
1313}
1314
bellard19cb3732006-08-19 11:45:59 +00001315/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001316int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001317 uint8_t *buf, int nb_sectors)
1318{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001319 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001320}
1321
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001322static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001323 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001324{
1325 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001326 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001327
Jan Kiszka6ea44302009-11-30 18:21:19 +01001328 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001329 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001330
1331 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001332 idx = start / (sizeof(unsigned long) * 8);
1333 bit = start % (sizeof(unsigned long) * 8);
1334 val = bs->dirty_bitmap[idx];
1335 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001336 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001337 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001338 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001339 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001340 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001341 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001342 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001343 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001344 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001345 }
1346 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001347 }
1348}
1349
ths5fafdf22007-09-16 21:08:06 +00001350/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001351 -EIO generic I/O error (may happen for all errors)
1352 -ENOMEDIUM No media inserted.
1353 -EINVAL Invalid sector number or nb_sectors
1354 -EACCES Trying to write a read-only device
1355*/
ths5fafdf22007-09-16 21:08:06 +00001356int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001357 const uint8_t *buf, int nb_sectors)
1358{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001359 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001360}
1361
aliguorieda578e2009-03-12 19:57:16 +00001362int bdrv_pread(BlockDriverState *bs, int64_t offset,
1363 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001364{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001365 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001366 int len, nb_sectors, count;
1367 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001368 int ret;
bellard83f64092006-08-01 16:21:11 +00001369
1370 count = count1;
1371 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001372 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001373 if (len > count)
1374 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001375 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001376 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001377 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1378 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001379 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001380 count -= len;
1381 if (count == 0)
1382 return count1;
1383 sector_num++;
1384 buf += len;
1385 }
1386
1387 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001388 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001389 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001390 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1391 return ret;
bellard83f64092006-08-01 16:21:11 +00001392 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001393 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001394 buf += len;
1395 count -= len;
1396 }
1397
1398 /* add data from the last sector */
1399 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001400 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1401 return ret;
bellard83f64092006-08-01 16:21:11 +00001402 memcpy(buf, tmp_buf, count);
1403 }
1404 return count1;
1405}
1406
aliguorieda578e2009-03-12 19:57:16 +00001407int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1408 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001409{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001410 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001411 int len, nb_sectors, count;
1412 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001413 int ret;
bellard83f64092006-08-01 16:21:11 +00001414
1415 count = count1;
1416 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001417 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001418 if (len > count)
1419 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001420 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001421 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001422 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1423 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001424 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001425 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1426 return ret;
bellard83f64092006-08-01 16:21:11 +00001427 count -= len;
1428 if (count == 0)
1429 return count1;
1430 sector_num++;
1431 buf += len;
1432 }
1433
1434 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001435 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001436 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001437 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1438 return ret;
bellard83f64092006-08-01 16:21:11 +00001439 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001440 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001441 buf += len;
1442 count -= len;
1443 }
1444
1445 /* add data from the last sector */
1446 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001447 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1448 return ret;
bellard83f64092006-08-01 16:21:11 +00001449 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001450 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1451 return ret;
bellard83f64092006-08-01 16:21:11 +00001452 }
1453 return count1;
1454}
bellard83f64092006-08-01 16:21:11 +00001455
Kevin Wolff08145f2010-06-16 16:38:15 +02001456/*
1457 * Writes to the file and ensures that no writes are reordered across this
1458 * request (acts as a barrier)
1459 *
1460 * Returns 0 on success, -errno in error cases.
1461 */
1462int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1463 const void *buf, int count)
1464{
1465 int ret;
1466
1467 ret = bdrv_pwrite(bs, offset, buf, count);
1468 if (ret < 0) {
1469 return ret;
1470 }
1471
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001472 /* No flush needed for cache modes that use O_DSYNC */
1473 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001474 bdrv_flush(bs);
1475 }
1476
1477 return 0;
1478}
1479
Stefan Hajnocziab185922011-11-17 13:40:31 +00001480static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1481 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1482{
1483 /* Perform I/O through a temporary buffer so that users who scribble over
1484 * their read buffer while the operation is in progress do not end up
1485 * modifying the image file. This is critical for zero-copy guest I/O
1486 * where anything might happen inside guest memory.
1487 */
1488 void *bounce_buffer;
1489
1490 struct iovec iov;
1491 QEMUIOVector bounce_qiov;
1492 int64_t cluster_sector_num;
1493 int cluster_nb_sectors;
1494 size_t skip_bytes;
1495 int ret;
1496
1497 /* Cover entire cluster so no additional backing file I/O is required when
1498 * allocating cluster in the image file.
1499 */
1500 round_to_clusters(bs, sector_num, nb_sectors,
1501 &cluster_sector_num, &cluster_nb_sectors);
1502
1503 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1504 cluster_sector_num, cluster_nb_sectors);
1505
1506 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1507 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1508 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1509
1510 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1511 &bounce_qiov);
1512 if (ret < 0) {
1513 goto err;
1514 }
1515
1516 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1517 &bounce_qiov);
1518 if (ret < 0) {
1519 /* It might be okay to ignore write errors for guest requests. If this
1520 * is a deliberate copy-on-read then we don't want to ignore the error.
1521 * Simply report it in all cases.
1522 */
1523 goto err;
1524 }
1525
1526 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1527 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1528 nb_sectors * BDRV_SECTOR_SIZE);
1529
1530err:
1531 qemu_vfree(bounce_buffer);
1532 return ret;
1533}
1534
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001535/*
1536 * Handle a read request in coroutine context
1537 */
1538static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1539 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001540{
1541 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001542 BdrvTrackedRequest req;
1543 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001544
Kevin Wolfda1fa912011-07-14 17:27:13 +02001545 if (!drv) {
1546 return -ENOMEDIUM;
1547 }
1548 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1549 return -EIO;
1550 }
1551
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001552 /* throttling disk read I/O */
1553 if (bs->io_limits_enabled) {
1554 bdrv_io_limits_intercept(bs, false, nb_sectors);
1555 }
1556
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001557 if (bs->copy_on_read) {
1558 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1559 }
1560
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001561 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001562
1563 if (bs->copy_on_read) {
1564 int pnum;
1565
1566 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1567 if (ret < 0) {
1568 goto out;
1569 }
1570
1571 if (!ret || pnum != nb_sectors) {
1572 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1573 goto out;
1574 }
1575 }
1576
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001577 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001578
1579out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001580 tracked_request_end(&req);
1581 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001582}
1583
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001584int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001585 int nb_sectors, QEMUIOVector *qiov)
1586{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001587 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001588
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001589 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1590}
1591
1592/*
1593 * Handle a write request in coroutine context
1594 */
1595static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1596 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1597{
1598 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001599 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001600 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001601
1602 if (!bs->drv) {
1603 return -ENOMEDIUM;
1604 }
1605 if (bs->read_only) {
1606 return -EACCES;
1607 }
1608 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1609 return -EIO;
1610 }
1611
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001612 /* throttling disk write I/O */
1613 if (bs->io_limits_enabled) {
1614 bdrv_io_limits_intercept(bs, true, nb_sectors);
1615 }
1616
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001617 if (bs->copy_on_read) {
1618 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1619 }
1620
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001621 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1622
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001623 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1624
Kevin Wolfda1fa912011-07-14 17:27:13 +02001625 if (bs->dirty_bitmap) {
1626 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1627 }
1628
1629 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1630 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1631 }
1632
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001633 tracked_request_end(&req);
1634
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001635 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001636}
1637
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001638int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1639 int nb_sectors, QEMUIOVector *qiov)
1640{
1641 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1642
1643 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1644}
1645
bellard83f64092006-08-01 16:21:11 +00001646/**
bellard83f64092006-08-01 16:21:11 +00001647 * Truncate file to 'offset' bytes (needed only for file protocols)
1648 */
1649int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1650{
1651 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001652 int ret;
bellard83f64092006-08-01 16:21:11 +00001653 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001654 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001655 if (!drv->bdrv_truncate)
1656 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001657 if (bs->read_only)
1658 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001659 if (bdrv_in_use(bs))
1660 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001661 ret = drv->bdrv_truncate(bs, offset);
1662 if (ret == 0) {
1663 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001664 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001665 }
1666 return ret;
bellard83f64092006-08-01 16:21:11 +00001667}
1668
1669/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001670 * Length of a allocated file in bytes. Sparse files are counted by actual
1671 * allocated space. Return < 0 if error or unknown.
1672 */
1673int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1674{
1675 BlockDriver *drv = bs->drv;
1676 if (!drv) {
1677 return -ENOMEDIUM;
1678 }
1679 if (drv->bdrv_get_allocated_file_size) {
1680 return drv->bdrv_get_allocated_file_size(bs);
1681 }
1682 if (bs->file) {
1683 return bdrv_get_allocated_file_size(bs->file);
1684 }
1685 return -ENOTSUP;
1686}
1687
1688/**
bellard83f64092006-08-01 16:21:11 +00001689 * Length of a file in bytes. Return < 0 if error or unknown.
1690 */
1691int64_t bdrv_getlength(BlockDriverState *bs)
1692{
1693 BlockDriver *drv = bs->drv;
1694 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001695 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001696
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001697 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001698 if (drv->bdrv_getlength) {
1699 return drv->bdrv_getlength(bs);
1700 }
bellard83f64092006-08-01 16:21:11 +00001701 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001702 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001703}
1704
bellard19cb3732006-08-19 11:45:59 +00001705/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001706void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001707{
bellard19cb3732006-08-19 11:45:59 +00001708 int64_t length;
1709 length = bdrv_getlength(bs);
1710 if (length < 0)
1711 length = 0;
1712 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001713 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001714 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001715}
bellardcf989512004-02-16 21:56:36 +00001716
aliguorif3d54fc2008-11-25 21:50:24 +00001717struct partition {
1718 uint8_t boot_ind; /* 0x80 - active */
1719 uint8_t head; /* starting head */
1720 uint8_t sector; /* starting sector */
1721 uint8_t cyl; /* starting cylinder */
1722 uint8_t sys_ind; /* What partition type */
1723 uint8_t end_head; /* end head */
1724 uint8_t end_sector; /* end sector */
1725 uint8_t end_cyl; /* end cylinder */
1726 uint32_t start_sect; /* starting sector counting from 0 */
1727 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001728} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001729
1730/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1731static int guess_disk_lchs(BlockDriverState *bs,
1732 int *pcylinders, int *pheads, int *psectors)
1733{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001734 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001735 int ret, i, heads, sectors, cylinders;
1736 struct partition *p;
1737 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001738 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001739
1740 bdrv_get_geometry(bs, &nb_sectors);
1741
1742 ret = bdrv_read(bs, 0, buf, 1);
1743 if (ret < 0)
1744 return -1;
1745 /* test msdos magic */
1746 if (buf[510] != 0x55 || buf[511] != 0xaa)
1747 return -1;
1748 for(i = 0; i < 4; i++) {
1749 p = ((struct partition *)(buf + 0x1be)) + i;
1750 nr_sects = le32_to_cpu(p->nr_sects);
1751 if (nr_sects && p->end_head) {
1752 /* We make the assumption that the partition terminates on
1753 a cylinder boundary */
1754 heads = p->end_head + 1;
1755 sectors = p->end_sector & 63;
1756 if (sectors == 0)
1757 continue;
1758 cylinders = nb_sectors / (heads * sectors);
1759 if (cylinders < 1 || cylinders > 16383)
1760 continue;
1761 *pheads = heads;
1762 *psectors = sectors;
1763 *pcylinders = cylinders;
1764#if 0
1765 printf("guessed geometry: LCHS=%d %d %d\n",
1766 cylinders, heads, sectors);
1767#endif
1768 return 0;
1769 }
1770 }
1771 return -1;
1772}
1773
1774void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1775{
1776 int translation, lba_detected = 0;
1777 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001778 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001779
1780 /* if a geometry hint is available, use it */
1781 bdrv_get_geometry(bs, &nb_sectors);
1782 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1783 translation = bdrv_get_translation_hint(bs);
1784 if (cylinders != 0) {
1785 *pcyls = cylinders;
1786 *pheads = heads;
1787 *psecs = secs;
1788 } else {
1789 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1790 if (heads > 16) {
1791 /* if heads > 16, it means that a BIOS LBA
1792 translation was active, so the default
1793 hardware geometry is OK */
1794 lba_detected = 1;
1795 goto default_geometry;
1796 } else {
1797 *pcyls = cylinders;
1798 *pheads = heads;
1799 *psecs = secs;
1800 /* disable any translation to be in sync with
1801 the logical geometry */
1802 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1803 bdrv_set_translation_hint(bs,
1804 BIOS_ATA_TRANSLATION_NONE);
1805 }
1806 }
1807 } else {
1808 default_geometry:
1809 /* if no geometry, use a standard physical disk geometry */
1810 cylinders = nb_sectors / (16 * 63);
1811
1812 if (cylinders > 16383)
1813 cylinders = 16383;
1814 else if (cylinders < 2)
1815 cylinders = 2;
1816 *pcyls = cylinders;
1817 *pheads = 16;
1818 *psecs = 63;
1819 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1820 if ((*pcyls * *pheads) <= 131072) {
1821 bdrv_set_translation_hint(bs,
1822 BIOS_ATA_TRANSLATION_LARGE);
1823 } else {
1824 bdrv_set_translation_hint(bs,
1825 BIOS_ATA_TRANSLATION_LBA);
1826 }
1827 }
1828 }
1829 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1830 }
1831}
1832
ths5fafdf22007-09-16 21:08:06 +00001833void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001834 int cyls, int heads, int secs)
1835{
1836 bs->cyls = cyls;
1837 bs->heads = heads;
1838 bs->secs = secs;
1839}
1840
bellard46d47672004-11-16 01:45:27 +00001841void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1842{
1843 bs->translation = translation;
1844}
1845
ths5fafdf22007-09-16 21:08:06 +00001846void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001847 int *pcyls, int *pheads, int *psecs)
1848{
1849 *pcyls = bs->cyls;
1850 *pheads = bs->heads;
1851 *psecs = bs->secs;
1852}
1853
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001854/* throttling disk io limits */
1855void bdrv_set_io_limits(BlockDriverState *bs,
1856 BlockIOLimit *io_limits)
1857{
1858 bs->io_limits = *io_limits;
1859 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1860}
1861
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001862/* Recognize floppy formats */
1863typedef struct FDFormat {
1864 FDriveType drive;
1865 uint8_t last_sect;
1866 uint8_t max_track;
1867 uint8_t max_head;
1868} FDFormat;
1869
1870static const FDFormat fd_formats[] = {
1871 /* First entry is default format */
1872 /* 1.44 MB 3"1/2 floppy disks */
1873 { FDRIVE_DRV_144, 18, 80, 1, },
1874 { FDRIVE_DRV_144, 20, 80, 1, },
1875 { FDRIVE_DRV_144, 21, 80, 1, },
1876 { FDRIVE_DRV_144, 21, 82, 1, },
1877 { FDRIVE_DRV_144, 21, 83, 1, },
1878 { FDRIVE_DRV_144, 22, 80, 1, },
1879 { FDRIVE_DRV_144, 23, 80, 1, },
1880 { FDRIVE_DRV_144, 24, 80, 1, },
1881 /* 2.88 MB 3"1/2 floppy disks */
1882 { FDRIVE_DRV_288, 36, 80, 1, },
1883 { FDRIVE_DRV_288, 39, 80, 1, },
1884 { FDRIVE_DRV_288, 40, 80, 1, },
1885 { FDRIVE_DRV_288, 44, 80, 1, },
1886 { FDRIVE_DRV_288, 48, 80, 1, },
1887 /* 720 kB 3"1/2 floppy disks */
1888 { FDRIVE_DRV_144, 9, 80, 1, },
1889 { FDRIVE_DRV_144, 10, 80, 1, },
1890 { FDRIVE_DRV_144, 10, 82, 1, },
1891 { FDRIVE_DRV_144, 10, 83, 1, },
1892 { FDRIVE_DRV_144, 13, 80, 1, },
1893 { FDRIVE_DRV_144, 14, 80, 1, },
1894 /* 1.2 MB 5"1/4 floppy disks */
1895 { FDRIVE_DRV_120, 15, 80, 1, },
1896 { FDRIVE_DRV_120, 18, 80, 1, },
1897 { FDRIVE_DRV_120, 18, 82, 1, },
1898 { FDRIVE_DRV_120, 18, 83, 1, },
1899 { FDRIVE_DRV_120, 20, 80, 1, },
1900 /* 720 kB 5"1/4 floppy disks */
1901 { FDRIVE_DRV_120, 9, 80, 1, },
1902 { FDRIVE_DRV_120, 11, 80, 1, },
1903 /* 360 kB 5"1/4 floppy disks */
1904 { FDRIVE_DRV_120, 9, 40, 1, },
1905 { FDRIVE_DRV_120, 9, 40, 0, },
1906 { FDRIVE_DRV_120, 10, 41, 1, },
1907 { FDRIVE_DRV_120, 10, 42, 1, },
1908 /* 320 kB 5"1/4 floppy disks */
1909 { FDRIVE_DRV_120, 8, 40, 1, },
1910 { FDRIVE_DRV_120, 8, 40, 0, },
1911 /* 360 kB must match 5"1/4 better than 3"1/2... */
1912 { FDRIVE_DRV_144, 9, 80, 0, },
1913 /* end */
1914 { FDRIVE_DRV_NONE, -1, -1, 0, },
1915};
1916
1917void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1918 int *max_track, int *last_sect,
1919 FDriveType drive_in, FDriveType *drive)
1920{
1921 const FDFormat *parse;
1922 uint64_t nb_sectors, size;
1923 int i, first_match, match;
1924
1925 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1926 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1927 /* User defined disk */
1928 } else {
1929 bdrv_get_geometry(bs, &nb_sectors);
1930 match = -1;
1931 first_match = -1;
1932 for (i = 0; ; i++) {
1933 parse = &fd_formats[i];
1934 if (parse->drive == FDRIVE_DRV_NONE) {
1935 break;
1936 }
1937 if (drive_in == parse->drive ||
1938 drive_in == FDRIVE_DRV_NONE) {
1939 size = (parse->max_head + 1) * parse->max_track *
1940 parse->last_sect;
1941 if (nb_sectors == size) {
1942 match = i;
1943 break;
1944 }
1945 if (first_match == -1) {
1946 first_match = i;
1947 }
1948 }
1949 }
1950 if (match == -1) {
1951 if (first_match == -1) {
1952 match = 1;
1953 } else {
1954 match = first_match;
1955 }
1956 parse = &fd_formats[match];
1957 }
1958 *nb_heads = parse->max_head + 1;
1959 *max_track = parse->max_track;
1960 *last_sect = parse->last_sect;
1961 *drive = parse->drive;
1962 }
1963}
1964
bellard46d47672004-11-16 01:45:27 +00001965int bdrv_get_translation_hint(BlockDriverState *bs)
1966{
1967 return bs->translation;
1968}
1969
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001970void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1971 BlockErrorAction on_write_error)
1972{
1973 bs->on_read_error = on_read_error;
1974 bs->on_write_error = on_write_error;
1975}
1976
1977BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1978{
1979 return is_read ? bs->on_read_error : bs->on_write_error;
1980}
1981
bellardb3380822004-03-14 21:38:54 +00001982int bdrv_is_read_only(BlockDriverState *bs)
1983{
1984 return bs->read_only;
1985}
1986
ths985a03b2007-12-24 16:10:43 +00001987int bdrv_is_sg(BlockDriverState *bs)
1988{
1989 return bs->sg;
1990}
1991
Christoph Hellwige900a7b2009-09-04 19:01:15 +02001992int bdrv_enable_write_cache(BlockDriverState *bs)
1993{
1994 return bs->enable_write_cache;
1995}
1996
bellardea2384d2004-08-01 21:59:26 +00001997int bdrv_is_encrypted(BlockDriverState *bs)
1998{
1999 if (bs->backing_hd && bs->backing_hd->encrypted)
2000 return 1;
2001 return bs->encrypted;
2002}
2003
aliguoric0f4ce72009-03-05 23:01:01 +00002004int bdrv_key_required(BlockDriverState *bs)
2005{
2006 BlockDriverState *backing_hd = bs->backing_hd;
2007
2008 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2009 return 1;
2010 return (bs->encrypted && !bs->valid_key);
2011}
2012
bellardea2384d2004-08-01 21:59:26 +00002013int bdrv_set_key(BlockDriverState *bs, const char *key)
2014{
2015 int ret;
2016 if (bs->backing_hd && bs->backing_hd->encrypted) {
2017 ret = bdrv_set_key(bs->backing_hd, key);
2018 if (ret < 0)
2019 return ret;
2020 if (!bs->encrypted)
2021 return 0;
2022 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002023 if (!bs->encrypted) {
2024 return -EINVAL;
2025 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2026 return -ENOMEDIUM;
2027 }
aliguoric0f4ce72009-03-05 23:01:01 +00002028 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002029 if (ret < 0) {
2030 bs->valid_key = 0;
2031 } else if (!bs->valid_key) {
2032 bs->valid_key = 1;
2033 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002034 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002035 }
aliguoric0f4ce72009-03-05 23:01:01 +00002036 return ret;
bellardea2384d2004-08-01 21:59:26 +00002037}
2038
2039void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2040{
bellard19cb3732006-08-19 11:45:59 +00002041 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002042 buf[0] = '\0';
2043 } else {
2044 pstrcpy(buf, buf_size, bs->drv->format_name);
2045 }
2046}
2047
ths5fafdf22007-09-16 21:08:06 +00002048void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002049 void *opaque)
2050{
2051 BlockDriver *drv;
2052
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002053 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002054 it(opaque, drv->format_name);
2055 }
2056}
2057
bellardb3380822004-03-14 21:38:54 +00002058BlockDriverState *bdrv_find(const char *name)
2059{
2060 BlockDriverState *bs;
2061
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002062 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2063 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002064 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002065 }
bellardb3380822004-03-14 21:38:54 +00002066 }
2067 return NULL;
2068}
2069
Markus Armbruster2f399b02010-06-02 18:55:20 +02002070BlockDriverState *bdrv_next(BlockDriverState *bs)
2071{
2072 if (!bs) {
2073 return QTAILQ_FIRST(&bdrv_states);
2074 }
2075 return QTAILQ_NEXT(bs, list);
2076}
2077
aliguori51de9762009-03-05 23:00:43 +00002078void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002079{
2080 BlockDriverState *bs;
2081
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002082 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002083 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002084 }
2085}
2086
bellardea2384d2004-08-01 21:59:26 +00002087const char *bdrv_get_device_name(BlockDriverState *bs)
2088{
2089 return bs->device_name;
2090}
2091
aliguoric6ca28d2008-10-06 13:55:43 +00002092void bdrv_flush_all(void)
2093{
2094 BlockDriverState *bs;
2095
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002096 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002097 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002098 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002099 }
2100 }
aliguoric6ca28d2008-10-06 13:55:43 +00002101}
2102
Kevin Wolff2feebb2010-04-14 17:30:35 +02002103int bdrv_has_zero_init(BlockDriverState *bs)
2104{
2105 assert(bs->drv);
2106
Kevin Wolf336c1c12010-07-28 11:26:29 +02002107 if (bs->drv->bdrv_has_zero_init) {
2108 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002109 }
2110
2111 return 1;
2112}
2113
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002114typedef struct BdrvCoIsAllocatedData {
2115 BlockDriverState *bs;
2116 int64_t sector_num;
2117 int nb_sectors;
2118 int *pnum;
2119 int ret;
2120 bool done;
2121} BdrvCoIsAllocatedData;
2122
thsf58c7b32008-06-05 21:53:49 +00002123/*
2124 * Returns true iff the specified sector is present in the disk image. Drivers
2125 * not implementing the functionality are assumed to not support backing files,
2126 * hence all their sectors are reported as allocated.
2127 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002128 * If 'sector_num' is beyond the end of the disk image the return value is 0
2129 * and 'pnum' is set to 0.
2130 *
thsf58c7b32008-06-05 21:53:49 +00002131 * 'pnum' is set to the number of sectors (including and immediately following
2132 * the specified sector) that are known to be in the same
2133 * allocated/unallocated state.
2134 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002135 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2136 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002137 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002138int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2139 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002140{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002141 int64_t n;
2142
2143 if (sector_num >= bs->total_sectors) {
2144 *pnum = 0;
2145 return 0;
2146 }
2147
2148 n = bs->total_sectors - sector_num;
2149 if (n < nb_sectors) {
2150 nb_sectors = n;
2151 }
2152
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002153 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002154 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002155 return 1;
2156 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002157
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002158 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2159}
2160
2161/* Coroutine wrapper for bdrv_is_allocated() */
2162static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2163{
2164 BdrvCoIsAllocatedData *data = opaque;
2165 BlockDriverState *bs = data->bs;
2166
2167 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2168 data->pnum);
2169 data->done = true;
2170}
2171
2172/*
2173 * Synchronous wrapper around bdrv_co_is_allocated().
2174 *
2175 * See bdrv_co_is_allocated() for details.
2176 */
2177int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2178 int *pnum)
2179{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002180 Coroutine *co;
2181 BdrvCoIsAllocatedData data = {
2182 .bs = bs,
2183 .sector_num = sector_num,
2184 .nb_sectors = nb_sectors,
2185 .pnum = pnum,
2186 .done = false,
2187 };
2188
2189 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2190 qemu_coroutine_enter(co, &data);
2191 while (!data.done) {
2192 qemu_aio_wait();
2193 }
2194 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002195}
2196
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002197void bdrv_mon_event(const BlockDriverState *bdrv,
2198 BlockMonEventAction action, int is_read)
2199{
2200 QObject *data;
2201 const char *action_str;
2202
2203 switch (action) {
2204 case BDRV_ACTION_REPORT:
2205 action_str = "report";
2206 break;
2207 case BDRV_ACTION_IGNORE:
2208 action_str = "ignore";
2209 break;
2210 case BDRV_ACTION_STOP:
2211 action_str = "stop";
2212 break;
2213 default:
2214 abort();
2215 }
2216
2217 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2218 bdrv->device_name,
2219 action_str,
2220 is_read ? "read" : "write");
2221 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2222
2223 qobject_decref(data);
2224}
2225
Luiz Capitulinob2023812011-09-21 17:16:47 -03002226BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002227{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002228 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002229 BlockDriverState *bs;
2230
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002231 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002232 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002233
Luiz Capitulinob2023812011-09-21 17:16:47 -03002234 info->value = g_malloc0(sizeof(*info->value));
2235 info->value->device = g_strdup(bs->device_name);
2236 info->value->type = g_strdup("unknown");
2237 info->value->locked = bdrv_dev_is_medium_locked(bs);
2238 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002239
Markus Armbrustere4def802011-09-06 18:58:53 +02002240 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002241 info->value->has_tray_open = true;
2242 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002243 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002244
2245 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002246 info->value->has_io_status = true;
2247 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002248 }
2249
bellard19cb3732006-08-19 11:45:59 +00002250 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002251 info->value->has_inserted = true;
2252 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2253 info->value->inserted->file = g_strdup(bs->filename);
2254 info->value->inserted->ro = bs->read_only;
2255 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2256 info->value->inserted->encrypted = bs->encrypted;
2257 if (bs->backing_file[0]) {
2258 info->value->inserted->has_backing_file = true;
2259 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002260 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002261
2262 if (bs->io_limits_enabled) {
2263 info->value->inserted->bps =
2264 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2265 info->value->inserted->bps_rd =
2266 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2267 info->value->inserted->bps_wr =
2268 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2269 info->value->inserted->iops =
2270 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2271 info->value->inserted->iops_rd =
2272 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2273 info->value->inserted->iops_wr =
2274 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2275 }
bellardb3380822004-03-14 21:38:54 +00002276 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002277
2278 /* XXX: waiting for the qapi to support GSList */
2279 if (!cur_item) {
2280 head = cur_item = info;
2281 } else {
2282 cur_item->next = info;
2283 cur_item = info;
2284 }
bellardb3380822004-03-14 21:38:54 +00002285 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002286
Luiz Capitulinob2023812011-09-21 17:16:47 -03002287 return head;
bellardb3380822004-03-14 21:38:54 +00002288}
thsa36e69d2007-12-02 05:18:19 +00002289
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002290/* Consider exposing this as a full fledged QMP command */
2291static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002292{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002293 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002294
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002295 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002296
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002297 if (bs->device_name[0]) {
2298 s->has_device = true;
2299 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002300 }
2301
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002302 s->stats = g_malloc0(sizeof(*s->stats));
2303 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2304 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2305 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2306 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2307 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2308 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2309 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2310 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2311 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2312
Kevin Wolf294cc352010-04-28 14:34:01 +02002313 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002314 s->has_parent = true;
2315 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002316 }
2317
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002318 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002319}
2320
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002321BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002322{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002323 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002324 BlockDriverState *bs;
2325
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002326 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002327 BlockStatsList *info = g_malloc0(sizeof(*info));
2328 info->value = qmp_query_blockstat(bs, NULL);
2329
2330 /* XXX: waiting for the qapi to support GSList */
2331 if (!cur_item) {
2332 head = cur_item = info;
2333 } else {
2334 cur_item->next = info;
2335 cur_item = info;
2336 }
thsa36e69d2007-12-02 05:18:19 +00002337 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002338
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002339 return head;
thsa36e69d2007-12-02 05:18:19 +00002340}
bellardea2384d2004-08-01 21:59:26 +00002341
aliguori045df332009-03-05 23:00:48 +00002342const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2343{
2344 if (bs->backing_hd && bs->backing_hd->encrypted)
2345 return bs->backing_file;
2346 else if (bs->encrypted)
2347 return bs->filename;
2348 else
2349 return NULL;
2350}
2351
ths5fafdf22007-09-16 21:08:06 +00002352void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002353 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002354{
Kevin Wolf3574c602011-10-26 11:02:11 +02002355 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002356}
2357
ths5fafdf22007-09-16 21:08:06 +00002358int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002359 const uint8_t *buf, int nb_sectors)
2360{
2361 BlockDriver *drv = bs->drv;
2362 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002363 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002364 if (!drv->bdrv_write_compressed)
2365 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002366 if (bdrv_check_request(bs, sector_num, nb_sectors))
2367 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002368
Jan Kiszkac6d22832009-11-30 18:21:20 +01002369 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002370 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2371 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002372
bellardfaea38e2006-08-05 21:31:00 +00002373 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2374}
ths3b46e622007-09-17 08:09:54 +00002375
bellardfaea38e2006-08-05 21:31:00 +00002376int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2377{
2378 BlockDriver *drv = bs->drv;
2379 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002380 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002381 if (!drv->bdrv_get_info)
2382 return -ENOTSUP;
2383 memset(bdi, 0, sizeof(*bdi));
2384 return drv->bdrv_get_info(bs, bdi);
2385}
2386
Christoph Hellwig45566e92009-07-10 23:11:57 +02002387int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2388 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002389{
2390 BlockDriver *drv = bs->drv;
2391 if (!drv)
2392 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002393 if (drv->bdrv_save_vmstate)
2394 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2395 if (bs->file)
2396 return bdrv_save_vmstate(bs->file, buf, pos, size);
2397 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002398}
2399
Christoph Hellwig45566e92009-07-10 23:11:57 +02002400int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2401 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002402{
2403 BlockDriver *drv = bs->drv;
2404 if (!drv)
2405 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002406 if (drv->bdrv_load_vmstate)
2407 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2408 if (bs->file)
2409 return bdrv_load_vmstate(bs->file, buf, pos, size);
2410 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002411}
2412
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002413void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2414{
2415 BlockDriver *drv = bs->drv;
2416
2417 if (!drv || !drv->bdrv_debug_event) {
2418 return;
2419 }
2420
2421 return drv->bdrv_debug_event(bs, event);
2422
2423}
2424
bellardfaea38e2006-08-05 21:31:00 +00002425/**************************************************************/
2426/* handling of snapshots */
2427
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002428int bdrv_can_snapshot(BlockDriverState *bs)
2429{
2430 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002431 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002432 return 0;
2433 }
2434
2435 if (!drv->bdrv_snapshot_create) {
2436 if (bs->file != NULL) {
2437 return bdrv_can_snapshot(bs->file);
2438 }
2439 return 0;
2440 }
2441
2442 return 1;
2443}
2444
Blue Swirl199630b2010-07-25 20:49:34 +00002445int bdrv_is_snapshot(BlockDriverState *bs)
2446{
2447 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2448}
2449
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002450BlockDriverState *bdrv_snapshots(void)
2451{
2452 BlockDriverState *bs;
2453
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002454 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002455 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002456 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002457
2458 bs = NULL;
2459 while ((bs = bdrv_next(bs))) {
2460 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002461 bs_snapshots = bs;
2462 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002463 }
2464 }
2465 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002466}
2467
ths5fafdf22007-09-16 21:08:06 +00002468int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002469 QEMUSnapshotInfo *sn_info)
2470{
2471 BlockDriver *drv = bs->drv;
2472 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002473 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002474 if (drv->bdrv_snapshot_create)
2475 return drv->bdrv_snapshot_create(bs, sn_info);
2476 if (bs->file)
2477 return bdrv_snapshot_create(bs->file, sn_info);
2478 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002479}
2480
ths5fafdf22007-09-16 21:08:06 +00002481int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002482 const char *snapshot_id)
2483{
2484 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002485 int ret, open_ret;
2486
bellardfaea38e2006-08-05 21:31:00 +00002487 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002488 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002489 if (drv->bdrv_snapshot_goto)
2490 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2491
2492 if (bs->file) {
2493 drv->bdrv_close(bs);
2494 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2495 open_ret = drv->bdrv_open(bs, bs->open_flags);
2496 if (open_ret < 0) {
2497 bdrv_delete(bs->file);
2498 bs->drv = NULL;
2499 return open_ret;
2500 }
2501 return ret;
2502 }
2503
2504 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002505}
2506
2507int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2508{
2509 BlockDriver *drv = bs->drv;
2510 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002511 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002512 if (drv->bdrv_snapshot_delete)
2513 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2514 if (bs->file)
2515 return bdrv_snapshot_delete(bs->file, snapshot_id);
2516 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002517}
2518
ths5fafdf22007-09-16 21:08:06 +00002519int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002520 QEMUSnapshotInfo **psn_info)
2521{
2522 BlockDriver *drv = bs->drv;
2523 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002524 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002525 if (drv->bdrv_snapshot_list)
2526 return drv->bdrv_snapshot_list(bs, psn_info);
2527 if (bs->file)
2528 return bdrv_snapshot_list(bs->file, psn_info);
2529 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002530}
2531
edison51ef6722010-09-21 19:58:41 -07002532int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2533 const char *snapshot_name)
2534{
2535 BlockDriver *drv = bs->drv;
2536 if (!drv) {
2537 return -ENOMEDIUM;
2538 }
2539 if (!bs->read_only) {
2540 return -EINVAL;
2541 }
2542 if (drv->bdrv_snapshot_load_tmp) {
2543 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2544 }
2545 return -ENOTSUP;
2546}
2547
bellardfaea38e2006-08-05 21:31:00 +00002548#define NB_SUFFIXES 4
2549
2550char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2551{
2552 static const char suffixes[NB_SUFFIXES] = "KMGT";
2553 int64_t base;
2554 int i;
2555
2556 if (size <= 999) {
2557 snprintf(buf, buf_size, "%" PRId64, size);
2558 } else {
2559 base = 1024;
2560 for(i = 0; i < NB_SUFFIXES; i++) {
2561 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002562 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002563 (double)size / base,
2564 suffixes[i]);
2565 break;
2566 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002567 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002568 ((size + (base >> 1)) / base),
2569 suffixes[i]);
2570 break;
2571 }
2572 base = base * 1024;
2573 }
2574 }
2575 return buf;
2576}
2577
2578char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2579{
2580 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002581#ifdef _WIN32
2582 struct tm *ptm;
2583#else
bellardfaea38e2006-08-05 21:31:00 +00002584 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002585#endif
bellardfaea38e2006-08-05 21:31:00 +00002586 time_t ti;
2587 int64_t secs;
2588
2589 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002590 snprintf(buf, buf_size,
2591 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002592 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2593 } else {
2594 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002595#ifdef _WIN32
2596 ptm = localtime(&ti);
2597 strftime(date_buf, sizeof(date_buf),
2598 "%Y-%m-%d %H:%M:%S", ptm);
2599#else
bellardfaea38e2006-08-05 21:31:00 +00002600 localtime_r(&ti, &tm);
2601 strftime(date_buf, sizeof(date_buf),
2602 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002603#endif
bellardfaea38e2006-08-05 21:31:00 +00002604 secs = sn->vm_clock_nsec / 1000000000;
2605 snprintf(clock_buf, sizeof(clock_buf),
2606 "%02d:%02d:%02d.%03d",
2607 (int)(secs / 3600),
2608 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002609 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002610 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2611 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002612 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002613 sn->id_str, sn->name,
2614 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2615 date_buf,
2616 clock_buf);
2617 }
2618 return buf;
2619}
2620
bellard83f64092006-08-01 16:21:11 +00002621/**************************************************************/
2622/* async I/Os */
2623
aliguori3b69e4b2009-01-22 16:59:24 +00002624BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002625 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002626 BlockDriverCompletionFunc *cb, void *opaque)
2627{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002628 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2629
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002630 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002631 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002632}
2633
aliguorif141eaf2009-04-07 18:43:24 +00002634BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2635 QEMUIOVector *qiov, int nb_sectors,
2636 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002637{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002638 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2639
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002640 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002641 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002642}
2643
Kevin Wolf40b4f532009-09-09 17:53:37 +02002644
2645typedef struct MultiwriteCB {
2646 int error;
2647 int num_requests;
2648 int num_callbacks;
2649 struct {
2650 BlockDriverCompletionFunc *cb;
2651 void *opaque;
2652 QEMUIOVector *free_qiov;
2653 void *free_buf;
2654 } callbacks[];
2655} MultiwriteCB;
2656
2657static void multiwrite_user_cb(MultiwriteCB *mcb)
2658{
2659 int i;
2660
2661 for (i = 0; i < mcb->num_callbacks; i++) {
2662 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002663 if (mcb->callbacks[i].free_qiov) {
2664 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2665 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002666 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002667 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002668 }
2669}
2670
2671static void multiwrite_cb(void *opaque, int ret)
2672{
2673 MultiwriteCB *mcb = opaque;
2674
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002675 trace_multiwrite_cb(mcb, ret);
2676
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002677 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002678 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002679 }
2680
2681 mcb->num_requests--;
2682 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002683 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002684 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002685 }
2686}
2687
2688static int multiwrite_req_compare(const void *a, const void *b)
2689{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002690 const BlockRequest *req1 = a, *req2 = b;
2691
2692 /*
2693 * Note that we can't simply subtract req2->sector from req1->sector
2694 * here as that could overflow the return value.
2695 */
2696 if (req1->sector > req2->sector) {
2697 return 1;
2698 } else if (req1->sector < req2->sector) {
2699 return -1;
2700 } else {
2701 return 0;
2702 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002703}
2704
2705/*
2706 * Takes a bunch of requests and tries to merge them. Returns the number of
2707 * requests that remain after merging.
2708 */
2709static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2710 int num_reqs, MultiwriteCB *mcb)
2711{
2712 int i, outidx;
2713
2714 // Sort requests by start sector
2715 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2716
2717 // Check if adjacent requests touch the same clusters. If so, combine them,
2718 // filling up gaps with zero sectors.
2719 outidx = 0;
2720 for (i = 1; i < num_reqs; i++) {
2721 int merge = 0;
2722 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2723
2724 // This handles the cases that are valid for all block drivers, namely
2725 // exactly sequential writes and overlapping writes.
2726 if (reqs[i].sector <= oldreq_last) {
2727 merge = 1;
2728 }
2729
2730 // The block driver may decide that it makes sense to combine requests
2731 // even if there is a gap of some sectors between them. In this case,
2732 // the gap is filled with zeros (therefore only applicable for yet
2733 // unused space in format like qcow2).
2734 if (!merge && bs->drv->bdrv_merge_requests) {
2735 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2736 }
2737
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002738 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2739 merge = 0;
2740 }
2741
Kevin Wolf40b4f532009-09-09 17:53:37 +02002742 if (merge) {
2743 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002744 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002745 qemu_iovec_init(qiov,
2746 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2747
2748 // Add the first request to the merged one. If the requests are
2749 // overlapping, drop the last sectors of the first request.
2750 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2751 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2752
2753 // We might need to add some zeros between the two requests
2754 if (reqs[i].sector > oldreq_last) {
2755 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2756 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2757 memset(buf, 0, zero_bytes);
2758 qemu_iovec_add(qiov, buf, zero_bytes);
2759 mcb->callbacks[i].free_buf = buf;
2760 }
2761
2762 // Add the second request
2763 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2764
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002765 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002766 reqs[outidx].qiov = qiov;
2767
2768 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2769 } else {
2770 outidx++;
2771 reqs[outidx].sector = reqs[i].sector;
2772 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2773 reqs[outidx].qiov = reqs[i].qiov;
2774 }
2775 }
2776
2777 return outidx + 1;
2778}
2779
2780/*
2781 * Submit multiple AIO write requests at once.
2782 *
2783 * On success, the function returns 0 and all requests in the reqs array have
2784 * been submitted. In error case this function returns -1, and any of the
2785 * requests may or may not be submitted yet. In particular, this means that the
2786 * callback will be called for some of the requests, for others it won't. The
2787 * caller must check the error field of the BlockRequest to wait for the right
2788 * callbacks (if error != 0, no callback will be called).
2789 *
2790 * The implementation may modify the contents of the reqs array, e.g. to merge
2791 * requests. However, the fields opaque and error are left unmodified as they
2792 * are used to signal failure for a single request to the caller.
2793 */
2794int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2795{
2796 BlockDriverAIOCB *acb;
2797 MultiwriteCB *mcb;
2798 int i;
2799
Ryan Harper301db7c2011-03-07 10:01:04 -06002800 /* don't submit writes if we don't have a medium */
2801 if (bs->drv == NULL) {
2802 for (i = 0; i < num_reqs; i++) {
2803 reqs[i].error = -ENOMEDIUM;
2804 }
2805 return -1;
2806 }
2807
Kevin Wolf40b4f532009-09-09 17:53:37 +02002808 if (num_reqs == 0) {
2809 return 0;
2810 }
2811
2812 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002813 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002814 mcb->num_requests = 0;
2815 mcb->num_callbacks = num_reqs;
2816
2817 for (i = 0; i < num_reqs; i++) {
2818 mcb->callbacks[i].cb = reqs[i].cb;
2819 mcb->callbacks[i].opaque = reqs[i].opaque;
2820 }
2821
2822 // Check for mergable requests
2823 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2824
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002825 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2826
Kevin Wolf453f9a12010-07-02 14:01:21 +02002827 /*
2828 * Run the aio requests. As soon as one request can't be submitted
2829 * successfully, fail all requests that are not yet submitted (we must
2830 * return failure for all requests anyway)
2831 *
2832 * num_requests cannot be set to the right value immediately: If
2833 * bdrv_aio_writev fails for some request, num_requests would be too high
2834 * and therefore multiwrite_cb() would never recognize the multiwrite
2835 * request as completed. We also cannot use the loop variable i to set it
2836 * when the first request fails because the callback may already have been
2837 * called for previously submitted requests. Thus, num_requests must be
2838 * incremented for each request that is submitted.
2839 *
2840 * The problem that callbacks may be called early also means that we need
2841 * to take care that num_requests doesn't become 0 before all requests are
2842 * submitted - multiwrite_cb() would consider the multiwrite request
2843 * completed. A dummy request that is "completed" by a manual call to
2844 * multiwrite_cb() takes care of this.
2845 */
2846 mcb->num_requests = 1;
2847
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002848 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002849 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002850 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002851 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2852 reqs[i].nb_sectors, multiwrite_cb, mcb);
2853
2854 if (acb == NULL) {
2855 // We can only fail the whole thing if no request has been
2856 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2857 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002858 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002859 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002860 goto fail;
2861 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002862 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002863 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002864 break;
2865 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002866 }
2867 }
2868
Kevin Wolf453f9a12010-07-02 14:01:21 +02002869 /* Complete the dummy request */
2870 multiwrite_cb(mcb, 0);
2871
Kevin Wolf40b4f532009-09-09 17:53:37 +02002872 return 0;
2873
2874fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002875 for (i = 0; i < mcb->num_callbacks; i++) {
2876 reqs[i].error = -EIO;
2877 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002878 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002879 return -1;
2880}
2881
bellard83f64092006-08-01 16:21:11 +00002882void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002883{
aliguori6bbff9a2009-03-20 18:25:59 +00002884 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002885}
2886
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002887/* block I/O throttling */
2888static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2889 bool is_write, double elapsed_time, uint64_t *wait)
2890{
2891 uint64_t bps_limit = 0;
2892 double bytes_limit, bytes_base, bytes_res;
2893 double slice_time, wait_time;
2894
2895 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2896 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2897 } else if (bs->io_limits.bps[is_write]) {
2898 bps_limit = bs->io_limits.bps[is_write];
2899 } else {
2900 if (wait) {
2901 *wait = 0;
2902 }
2903
2904 return false;
2905 }
2906
2907 slice_time = bs->slice_end - bs->slice_start;
2908 slice_time /= (NANOSECONDS_PER_SECOND);
2909 bytes_limit = bps_limit * slice_time;
2910 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2911 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2912 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2913 }
2914
2915 /* bytes_base: the bytes of data which have been read/written; and
2916 * it is obtained from the history statistic info.
2917 * bytes_res: the remaining bytes of data which need to be read/written.
2918 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2919 * the total time for completing reading/writting all data.
2920 */
2921 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2922
2923 if (bytes_base + bytes_res <= bytes_limit) {
2924 if (wait) {
2925 *wait = 0;
2926 }
2927
2928 return false;
2929 }
2930
2931 /* Calc approx time to dispatch */
2932 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2933
2934 /* When the I/O rate at runtime exceeds the limits,
2935 * bs->slice_end need to be extended in order that the current statistic
2936 * info can be kept until the timer fire, so it is increased and tuned
2937 * based on the result of experiment.
2938 */
2939 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2940 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2941 if (wait) {
2942 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2943 }
2944
2945 return true;
2946}
2947
2948static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2949 double elapsed_time, uint64_t *wait)
2950{
2951 uint64_t iops_limit = 0;
2952 double ios_limit, ios_base;
2953 double slice_time, wait_time;
2954
2955 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2956 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2957 } else if (bs->io_limits.iops[is_write]) {
2958 iops_limit = bs->io_limits.iops[is_write];
2959 } else {
2960 if (wait) {
2961 *wait = 0;
2962 }
2963
2964 return false;
2965 }
2966
2967 slice_time = bs->slice_end - bs->slice_start;
2968 slice_time /= (NANOSECONDS_PER_SECOND);
2969 ios_limit = iops_limit * slice_time;
2970 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2971 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2972 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2973 }
2974
2975 if (ios_base + 1 <= ios_limit) {
2976 if (wait) {
2977 *wait = 0;
2978 }
2979
2980 return false;
2981 }
2982
2983 /* Calc approx time to dispatch */
2984 wait_time = (ios_base + 1) / iops_limit;
2985 if (wait_time > elapsed_time) {
2986 wait_time = wait_time - elapsed_time;
2987 } else {
2988 wait_time = 0;
2989 }
2990
2991 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2992 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2993 if (wait) {
2994 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2995 }
2996
2997 return true;
2998}
2999
3000static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3001 bool is_write, int64_t *wait)
3002{
3003 int64_t now, max_wait;
3004 uint64_t bps_wait = 0, iops_wait = 0;
3005 double elapsed_time;
3006 int bps_ret, iops_ret;
3007
3008 now = qemu_get_clock_ns(vm_clock);
3009 if ((bs->slice_start < now)
3010 && (bs->slice_end > now)) {
3011 bs->slice_end = now + bs->slice_time;
3012 } else {
3013 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3014 bs->slice_start = now;
3015 bs->slice_end = now + bs->slice_time;
3016
3017 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3018 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3019
3020 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3021 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3022 }
3023
3024 elapsed_time = now - bs->slice_start;
3025 elapsed_time /= (NANOSECONDS_PER_SECOND);
3026
3027 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3028 is_write, elapsed_time, &bps_wait);
3029 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3030 elapsed_time, &iops_wait);
3031 if (bps_ret || iops_ret) {
3032 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3033 if (wait) {
3034 *wait = max_wait;
3035 }
3036
3037 now = qemu_get_clock_ns(vm_clock);
3038 if (bs->slice_end < now + max_wait) {
3039 bs->slice_end = now + max_wait;
3040 }
3041
3042 return true;
3043 }
3044
3045 if (wait) {
3046 *wait = 0;
3047 }
3048
3049 return false;
3050}
pbrookce1a14d2006-08-07 02:38:06 +00003051
bellard83f64092006-08-01 16:21:11 +00003052/**************************************************************/
3053/* async block device emulation */
3054
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003055typedef struct BlockDriverAIOCBSync {
3056 BlockDriverAIOCB common;
3057 QEMUBH *bh;
3058 int ret;
3059 /* vector translation state */
3060 QEMUIOVector *qiov;
3061 uint8_t *bounce;
3062 int is_write;
3063} BlockDriverAIOCBSync;
3064
3065static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3066{
Kevin Wolfb666d232010-05-05 11:44:39 +02003067 BlockDriverAIOCBSync *acb =
3068 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003069 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003070 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003071 qemu_aio_release(acb);
3072}
3073
3074static AIOPool bdrv_em_aio_pool = {
3075 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3076 .cancel = bdrv_aio_cancel_em,
3077};
3078
bellard83f64092006-08-01 16:21:11 +00003079static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003080{
pbrookce1a14d2006-08-07 02:38:06 +00003081 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003082
aliguorif141eaf2009-04-07 18:43:24 +00003083 if (!acb->is_write)
3084 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003085 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003086 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003087 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003088 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003089 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003090}
bellardbeac80c2006-06-26 20:08:57 +00003091
aliguorif141eaf2009-04-07 18:43:24 +00003092static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3093 int64_t sector_num,
3094 QEMUIOVector *qiov,
3095 int nb_sectors,
3096 BlockDriverCompletionFunc *cb,
3097 void *opaque,
3098 int is_write)
3099
bellardea2384d2004-08-01 21:59:26 +00003100{
pbrookce1a14d2006-08-07 02:38:06 +00003101 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003102
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003103 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003104 acb->is_write = is_write;
3105 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003106 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00003107
pbrookce1a14d2006-08-07 02:38:06 +00003108 if (!acb->bh)
3109 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003110
3111 if (is_write) {
3112 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003113 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003114 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003115 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003116 }
3117
pbrookce1a14d2006-08-07 02:38:06 +00003118 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003119
pbrookce1a14d2006-08-07 02:38:06 +00003120 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003121}
3122
aliguorif141eaf2009-04-07 18:43:24 +00003123static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3124 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003125 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003126{
aliguorif141eaf2009-04-07 18:43:24 +00003127 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003128}
3129
aliguorif141eaf2009-04-07 18:43:24 +00003130static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3131 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3132 BlockDriverCompletionFunc *cb, void *opaque)
3133{
3134 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3135}
3136
Kevin Wolf68485422011-06-30 10:05:46 +02003137
3138typedef struct BlockDriverAIOCBCoroutine {
3139 BlockDriverAIOCB common;
3140 BlockRequest req;
3141 bool is_write;
3142 QEMUBH* bh;
3143} BlockDriverAIOCBCoroutine;
3144
3145static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3146{
3147 qemu_aio_flush();
3148}
3149
3150static AIOPool bdrv_em_co_aio_pool = {
3151 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3152 .cancel = bdrv_aio_co_cancel_em,
3153};
3154
Paolo Bonzini35246a62011-10-14 10:41:29 +02003155static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003156{
3157 BlockDriverAIOCBCoroutine *acb = opaque;
3158
3159 acb->common.cb(acb->common.opaque, acb->req.error);
3160 qemu_bh_delete(acb->bh);
3161 qemu_aio_release(acb);
3162}
3163
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003164/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3165static void coroutine_fn bdrv_co_do_rw(void *opaque)
3166{
3167 BlockDriverAIOCBCoroutine *acb = opaque;
3168 BlockDriverState *bs = acb->common.bs;
3169
3170 if (!acb->is_write) {
3171 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3172 acb->req.nb_sectors, acb->req.qiov);
3173 } else {
3174 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3175 acb->req.nb_sectors, acb->req.qiov);
3176 }
3177
Paolo Bonzini35246a62011-10-14 10:41:29 +02003178 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003179 qemu_bh_schedule(acb->bh);
3180}
3181
Kevin Wolf68485422011-06-30 10:05:46 +02003182static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3183 int64_t sector_num,
3184 QEMUIOVector *qiov,
3185 int nb_sectors,
3186 BlockDriverCompletionFunc *cb,
3187 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003188 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003189{
3190 Coroutine *co;
3191 BlockDriverAIOCBCoroutine *acb;
3192
3193 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3194 acb->req.sector = sector_num;
3195 acb->req.nb_sectors = nb_sectors;
3196 acb->req.qiov = qiov;
3197 acb->is_write = is_write;
3198
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003199 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003200 qemu_coroutine_enter(co, acb);
3201
3202 return &acb->common;
3203}
3204
Paolo Bonzini07f07612011-10-17 12:32:12 +02003205static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003206{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003207 BlockDriverAIOCBCoroutine *acb = opaque;
3208 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003209
Paolo Bonzini07f07612011-10-17 12:32:12 +02003210 acb->req.error = bdrv_co_flush(bs);
3211 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003212 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003213}
3214
Paolo Bonzini07f07612011-10-17 12:32:12 +02003215BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003216 BlockDriverCompletionFunc *cb, void *opaque)
3217{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003218 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003219
Paolo Bonzini07f07612011-10-17 12:32:12 +02003220 Coroutine *co;
3221 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003222
Paolo Bonzini07f07612011-10-17 12:32:12 +02003223 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3224 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3225 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003226
Alexander Graf016f5cf2010-05-26 17:51:49 +02003227 return &acb->common;
3228}
3229
Paolo Bonzini4265d622011-10-17 12:32:14 +02003230static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3231{
3232 BlockDriverAIOCBCoroutine *acb = opaque;
3233 BlockDriverState *bs = acb->common.bs;
3234
3235 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3236 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3237 qemu_bh_schedule(acb->bh);
3238}
3239
3240BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3241 int64_t sector_num, int nb_sectors,
3242 BlockDriverCompletionFunc *cb, void *opaque)
3243{
3244 Coroutine *co;
3245 BlockDriverAIOCBCoroutine *acb;
3246
3247 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3248
3249 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3250 acb->req.sector = sector_num;
3251 acb->req.nb_sectors = nb_sectors;
3252 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3253 qemu_coroutine_enter(co, acb);
3254
3255 return &acb->common;
3256}
3257
bellardea2384d2004-08-01 21:59:26 +00003258void bdrv_init(void)
3259{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003260 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003261}
pbrookce1a14d2006-08-07 02:38:06 +00003262
Markus Armbrustereb852012009-10-27 18:41:44 +01003263void bdrv_init_with_whitelist(void)
3264{
3265 use_bdrv_whitelist = 1;
3266 bdrv_init();
3267}
3268
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003269void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3270 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003271{
pbrookce1a14d2006-08-07 02:38:06 +00003272 BlockDriverAIOCB *acb;
3273
aliguori6bbff9a2009-03-20 18:25:59 +00003274 if (pool->free_aiocb) {
3275 acb = pool->free_aiocb;
3276 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003277 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003278 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003279 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003280 }
3281 acb->bs = bs;
3282 acb->cb = cb;
3283 acb->opaque = opaque;
3284 return acb;
3285}
3286
3287void qemu_aio_release(void *p)
3288{
aliguori6bbff9a2009-03-20 18:25:59 +00003289 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3290 AIOPool *pool = acb->pool;
3291 acb->next = pool->free_aiocb;
3292 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003293}
bellard19cb3732006-08-19 11:45:59 +00003294
3295/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003296/* Coroutine block device emulation */
3297
3298typedef struct CoroutineIOCompletion {
3299 Coroutine *coroutine;
3300 int ret;
3301} CoroutineIOCompletion;
3302
3303static void bdrv_co_io_em_complete(void *opaque, int ret)
3304{
3305 CoroutineIOCompletion *co = opaque;
3306
3307 co->ret = ret;
3308 qemu_coroutine_enter(co->coroutine, NULL);
3309}
3310
3311static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3312 int nb_sectors, QEMUIOVector *iov,
3313 bool is_write)
3314{
3315 CoroutineIOCompletion co = {
3316 .coroutine = qemu_coroutine_self(),
3317 };
3318 BlockDriverAIOCB *acb;
3319
3320 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003321 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3322 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003323 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003324 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3325 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003326 }
3327
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003328 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003329 if (!acb) {
3330 return -EIO;
3331 }
3332 qemu_coroutine_yield();
3333
3334 return co.ret;
3335}
3336
3337static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3338 int64_t sector_num, int nb_sectors,
3339 QEMUIOVector *iov)
3340{
3341 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3342}
3343
3344static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3345 int64_t sector_num, int nb_sectors,
3346 QEMUIOVector *iov)
3347{
3348 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3349}
3350
Paolo Bonzini07f07612011-10-17 12:32:12 +02003351static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003352{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003353 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003354
Paolo Bonzini07f07612011-10-17 12:32:12 +02003355 rwco->ret = bdrv_co_flush(rwco->bs);
3356}
3357
3358int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3359{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003360 int ret;
3361
Kevin Wolfca716362011-11-10 18:13:59 +01003362 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003363 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003364 }
3365
Kevin Wolfca716362011-11-10 18:13:59 +01003366 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003367 if (bs->drv->bdrv_co_flush_to_os) {
3368 ret = bs->drv->bdrv_co_flush_to_os(bs);
3369 if (ret < 0) {
3370 return ret;
3371 }
3372 }
3373
Kevin Wolfca716362011-11-10 18:13:59 +01003374 /* But don't actually force it to the disk with cache=unsafe */
3375 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3376 return 0;
3377 }
3378
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003379 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003380 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003381 } else if (bs->drv->bdrv_aio_flush) {
3382 BlockDriverAIOCB *acb;
3383 CoroutineIOCompletion co = {
3384 .coroutine = qemu_coroutine_self(),
3385 };
3386
3387 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3388 if (acb == NULL) {
3389 return -EIO;
3390 } else {
3391 qemu_coroutine_yield();
3392 return co.ret;
3393 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003394 } else {
3395 /*
3396 * Some block drivers always operate in either writethrough or unsafe
3397 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3398 * know how the server works (because the behaviour is hardcoded or
3399 * depends on server-side configuration), so we can't ensure that
3400 * everything is safe on disk. Returning an error doesn't work because
3401 * that would break guests even if the server operates in writethrough
3402 * mode.
3403 *
3404 * Let's hope the user knows what he's doing.
3405 */
3406 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003407 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003408}
3409
Anthony Liguori0f154232011-11-14 15:09:45 -06003410void bdrv_invalidate_cache(BlockDriverState *bs)
3411{
3412 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3413 bs->drv->bdrv_invalidate_cache(bs);
3414 }
3415}
3416
3417void bdrv_invalidate_cache_all(void)
3418{
3419 BlockDriverState *bs;
3420
3421 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3422 bdrv_invalidate_cache(bs);
3423 }
3424}
3425
Paolo Bonzini07f07612011-10-17 12:32:12 +02003426int bdrv_flush(BlockDriverState *bs)
3427{
3428 Coroutine *co;
3429 RwCo rwco = {
3430 .bs = bs,
3431 .ret = NOT_DONE,
3432 };
3433
3434 if (qemu_in_coroutine()) {
3435 /* Fast-path if already in coroutine context */
3436 bdrv_flush_co_entry(&rwco);
3437 } else {
3438 co = qemu_coroutine_create(bdrv_flush_co_entry);
3439 qemu_coroutine_enter(co, &rwco);
3440 while (rwco.ret == NOT_DONE) {
3441 qemu_aio_wait();
3442 }
3443 }
3444
3445 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003446}
3447
Paolo Bonzini4265d622011-10-17 12:32:14 +02003448static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3449{
3450 RwCo *rwco = opaque;
3451
3452 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3453}
3454
3455int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3456 int nb_sectors)
3457{
3458 if (!bs->drv) {
3459 return -ENOMEDIUM;
3460 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3461 return -EIO;
3462 } else if (bs->read_only) {
3463 return -EROFS;
3464 } else if (bs->drv->bdrv_co_discard) {
3465 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3466 } else if (bs->drv->bdrv_aio_discard) {
3467 BlockDriverAIOCB *acb;
3468 CoroutineIOCompletion co = {
3469 .coroutine = qemu_coroutine_self(),
3470 };
3471
3472 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3473 bdrv_co_io_em_complete, &co);
3474 if (acb == NULL) {
3475 return -EIO;
3476 } else {
3477 qemu_coroutine_yield();
3478 return co.ret;
3479 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003480 } else {
3481 return 0;
3482 }
3483}
3484
3485int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3486{
3487 Coroutine *co;
3488 RwCo rwco = {
3489 .bs = bs,
3490 .sector_num = sector_num,
3491 .nb_sectors = nb_sectors,
3492 .ret = NOT_DONE,
3493 };
3494
3495 if (qemu_in_coroutine()) {
3496 /* Fast-path if already in coroutine context */
3497 bdrv_discard_co_entry(&rwco);
3498 } else {
3499 co = qemu_coroutine_create(bdrv_discard_co_entry);
3500 qemu_coroutine_enter(co, &rwco);
3501 while (rwco.ret == NOT_DONE) {
3502 qemu_aio_wait();
3503 }
3504 }
3505
3506 return rwco.ret;
3507}
3508
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003509/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003510/* removable device support */
3511
3512/**
3513 * Return TRUE if the media is present
3514 */
3515int bdrv_is_inserted(BlockDriverState *bs)
3516{
3517 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003518
bellard19cb3732006-08-19 11:45:59 +00003519 if (!drv)
3520 return 0;
3521 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003522 return 1;
3523 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003524}
3525
3526/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003527 * Return whether the media changed since the last call to this
3528 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003529 */
3530int bdrv_media_changed(BlockDriverState *bs)
3531{
3532 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003533
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003534 if (drv && drv->bdrv_media_changed) {
3535 return drv->bdrv_media_changed(bs);
3536 }
3537 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003538}
3539
3540/**
3541 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3542 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003543void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003544{
3545 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003546
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003547 if (drv && drv->bdrv_eject) {
3548 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003549 }
bellard19cb3732006-08-19 11:45:59 +00003550}
3551
bellard19cb3732006-08-19 11:45:59 +00003552/**
3553 * Lock or unlock the media (if it is locked, the user won't be able
3554 * to eject it manually).
3555 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003556void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003557{
3558 BlockDriver *drv = bs->drv;
3559
Markus Armbruster025e8492011-09-06 18:58:47 +02003560 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003561
Markus Armbruster025e8492011-09-06 18:58:47 +02003562 if (drv && drv->bdrv_lock_medium) {
3563 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003564 }
3565}
ths985a03b2007-12-24 16:10:43 +00003566
3567/* needed for generic scsi interface */
3568
3569int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3570{
3571 BlockDriver *drv = bs->drv;
3572
3573 if (drv && drv->bdrv_ioctl)
3574 return drv->bdrv_ioctl(bs, req, buf);
3575 return -ENOTSUP;
3576}
aliguori7d780662009-03-12 19:57:08 +00003577
aliguori221f7152009-03-28 17:28:41 +00003578BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3579 unsigned long int req, void *buf,
3580 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003581{
aliguori221f7152009-03-28 17:28:41 +00003582 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003583
aliguori221f7152009-03-28 17:28:41 +00003584 if (drv && drv->bdrv_aio_ioctl)
3585 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3586 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003587}
aliguorie268ca52009-04-22 20:20:00 +00003588
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003589void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3590{
3591 bs->buffer_alignment = align;
3592}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003593
aliguorie268ca52009-04-22 20:20:00 +00003594void *qemu_blockalign(BlockDriverState *bs, size_t size)
3595{
3596 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3597}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003598
3599void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3600{
3601 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003602
Liran Schouraaa0eb72010-01-26 10:31:48 +02003603 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003604 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003605 if (!bs->dirty_bitmap) {
3606 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3607 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3608 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003609
Anthony Liguori7267c092011-08-20 22:09:37 -05003610 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003611 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003612 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003613 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003614 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003615 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003616 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003617 }
3618}
3619
3620int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3621{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003622 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003623
Jan Kiszkac6d22832009-11-30 18:21:20 +01003624 if (bs->dirty_bitmap &&
3625 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003626 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3627 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003628 } else {
3629 return 0;
3630 }
3631}
3632
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003633void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3634 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003635{
3636 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3637}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003638
3639int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3640{
3641 return bs->dirty_count;
3642}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003643
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003644void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3645{
3646 assert(bs->in_use != in_use);
3647 bs->in_use = in_use;
3648}
3649
3650int bdrv_in_use(BlockDriverState *bs)
3651{
3652 return bs->in_use;
3653}
3654
Luiz Capitulino28a72822011-09-26 17:43:50 -03003655void bdrv_iostatus_enable(BlockDriverState *bs)
3656{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003657 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003658 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003659}
3660
3661/* The I/O status is only enabled if the drive explicitly
3662 * enables it _and_ the VM is configured to stop on errors */
3663bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3664{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003665 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003666 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3667 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3668 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3669}
3670
3671void bdrv_iostatus_disable(BlockDriverState *bs)
3672{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003673 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003674}
3675
3676void bdrv_iostatus_reset(BlockDriverState *bs)
3677{
3678 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003679 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003680 }
3681}
3682
3683/* XXX: Today this is set by device models because it makes the implementation
3684 quite simple. However, the block layer knows about the error, so it's
3685 possible to implement this without device models being involved */
3686void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3687{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003688 if (bdrv_iostatus_is_enabled(bs) &&
3689 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003690 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003691 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3692 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003693 }
3694}
3695
Christoph Hellwiga597e792011-08-25 08:26:01 +02003696void
3697bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3698 enum BlockAcctType type)
3699{
3700 assert(type < BDRV_MAX_IOTYPE);
3701
3702 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003703 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003704 cookie->type = type;
3705}
3706
3707void
3708bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3709{
3710 assert(cookie->type < BDRV_MAX_IOTYPE);
3711
3712 bs->nr_bytes[cookie->type] += cookie->bytes;
3713 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003714 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003715}
3716
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003717int bdrv_img_create(const char *filename, const char *fmt,
3718 const char *base_filename, const char *base_fmt,
3719 char *options, uint64_t img_size, int flags)
3720{
3721 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003722 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003723 BlockDriverState *bs = NULL;
3724 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003725 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003726 int ret = 0;
3727
3728 /* Find driver and parse its options */
3729 drv = bdrv_find_format(fmt);
3730 if (!drv) {
3731 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003732 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003733 goto out;
3734 }
3735
3736 proto_drv = bdrv_find_protocol(filename);
3737 if (!proto_drv) {
3738 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003739 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003740 goto out;
3741 }
3742
3743 create_options = append_option_parameters(create_options,
3744 drv->create_options);
3745 create_options = append_option_parameters(create_options,
3746 proto_drv->create_options);
3747
3748 /* Create parameter list with default values */
3749 param = parse_option_parameters("", create_options, param);
3750
3751 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3752
3753 /* Parse -o options */
3754 if (options) {
3755 param = parse_option_parameters(options, create_options, param);
3756 if (param == NULL) {
3757 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003758 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003759 goto out;
3760 }
3761 }
3762
3763 if (base_filename) {
3764 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3765 base_filename)) {
3766 error_report("Backing file not supported for file format '%s'",
3767 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003768 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003769 goto out;
3770 }
3771 }
3772
3773 if (base_fmt) {
3774 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3775 error_report("Backing file format not supported for file "
3776 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003777 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003778 goto out;
3779 }
3780 }
3781
Jes Sorensen792da932010-12-16 13:52:17 +01003782 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3783 if (backing_file && backing_file->value.s) {
3784 if (!strcmp(filename, backing_file->value.s)) {
3785 error_report("Error: Trying to create an image with the "
3786 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003787 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003788 goto out;
3789 }
3790 }
3791
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003792 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3793 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003794 backing_drv = bdrv_find_format(backing_fmt->value.s);
3795 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003796 error_report("Unknown backing file format '%s'",
3797 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003798 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003799 goto out;
3800 }
3801 }
3802
3803 // The size for the image must always be specified, with one exception:
3804 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003805 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3806 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003807 if (backing_file && backing_file->value.s) {
3808 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003809 char buf[32];
3810
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003811 bs = bdrv_new("");
3812
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003813 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003814 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003815 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003816 goto out;
3817 }
3818 bdrv_get_geometry(bs, &size);
3819 size *= 512;
3820
3821 snprintf(buf, sizeof(buf), "%" PRId64, size);
3822 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3823 } else {
3824 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003825 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003826 goto out;
3827 }
3828 }
3829
3830 printf("Formatting '%s', fmt=%s ", filename, fmt);
3831 print_option_parameters(param);
3832 puts("");
3833
3834 ret = bdrv_create(drv, filename, param);
3835
3836 if (ret < 0) {
3837 if (ret == -ENOTSUP) {
3838 error_report("Formatting or formatting option not supported for "
3839 "file format '%s'", fmt);
3840 } else if (ret == -EFBIG) {
3841 error_report("The image size is too large for file format '%s'",
3842 fmt);
3843 } else {
3844 error_report("%s: error while creating %s: %s", filename, fmt,
3845 strerror(-ret));
3846 }
3847 }
3848
3849out:
3850 free_option_parameters(create_options);
3851 free_option_parameters(param);
3852
3853 if (bs) {
3854 bdrv_delete(bs);
3855 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003856
3857 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003858}