blob: 434c13d82a55ce5c48869b50f34de90a15c63667 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000849/*
850 * Wait for pending requests to complete across all BlockDriverStates
851 *
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
854 */
855void bdrv_drain_all(void)
856{
857 BlockDriverState *bs;
858
859 qemu_aio_flush();
860
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs, &bdrv_states, list) {
863 assert(QLIST_EMPTY(&bs->tracked_requests));
864 assert(qemu_co_queue_empty(&bs->throttled_reqs));
865 }
866}
867
Ryan Harperd22b2f42011-03-29 20:51:47 -0500868/* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870void bdrv_make_anon(BlockDriverState *bs)
871{
872 if (bs->device_name[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states, bs, list);
874 }
875 bs->device_name[0] = '\0';
876}
877
bellardb3380822004-03-14 21:38:54 +0000878void bdrv_delete(BlockDriverState *bs)
879{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200880 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200881
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100882 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500883 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000884
bellardb3380822004-03-14 21:38:54 +0000885 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200886 if (bs->file != NULL) {
887 bdrv_delete(bs->file);
888 }
889
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200890 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500891 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000892}
893
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200894int bdrv_attach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200896{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200897 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200898 return -EBUSY;
899 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200900 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300901 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200902 return 0;
903}
904
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200905/* TODO qdevified devices don't use this, remove when devices are qdevified */
906void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200907{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200908 if (bdrv_attach_dev(bs, dev) < 0) {
909 abort();
910 }
911}
912
913void bdrv_detach_dev(BlockDriverState *bs, void *dev)
914/* TODO change to DeviceState *dev when all users are qdevified */
915{
916 assert(bs->dev == dev);
917 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918 bs->dev_ops = NULL;
919 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200920 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200921}
922
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200923/* TODO change to return DeviceState * when all users are qdevified */
924void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200925{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200926 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200927}
928
Markus Armbruster0e49de52011-08-03 15:07:41 +0200929void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
930 void *opaque)
931{
932 bs->dev_ops = ops;
933 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200934 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
935 bs_snapshots = NULL;
936 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200937}
938
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200939static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200940{
Markus Armbruster145feb12011-08-03 15:07:42 +0200941 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200942 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200943 }
944}
945
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200946bool bdrv_dev_has_removable_media(BlockDriverState *bs)
947{
948 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
949}
950
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100951void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
952{
953 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
954 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
955 }
956}
957
Markus Armbrustere4def802011-09-06 18:58:53 +0200958bool bdrv_dev_is_tray_open(BlockDriverState *bs)
959{
960 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
961 return bs->dev_ops->is_tray_open(bs->dev_opaque);
962 }
963 return false;
964}
965
Markus Armbruster145feb12011-08-03 15:07:42 +0200966static void bdrv_dev_resize_cb(BlockDriverState *bs)
967{
968 if (bs->dev_ops && bs->dev_ops->resize_cb) {
969 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200970 }
971}
972
Markus Armbrusterf1076392011-09-06 18:58:46 +0200973bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
974{
975 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
976 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
977 }
978 return false;
979}
980
aliguorie97fc192009-04-21 23:11:50 +0000981/*
982 * Run consistency checks on an image
983 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200984 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200985 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200986 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000987 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200988int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000989{
990 if (bs->drv->bdrv_check == NULL) {
991 return -ENOTSUP;
992 }
993
Kevin Wolfe076f332010-06-29 11:43:13 +0200994 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200995 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000996}
997
Kevin Wolf8a426612010-07-16 17:17:01 +0200998#define COMMIT_BUF_SECTORS 2048
999
bellard33e39632003-07-06 17:15:21 +00001000/* commit COW file into the raw image */
1001int bdrv_commit(BlockDriverState *bs)
1002{
bellard19cb3732006-08-19 11:45:59 +00001003 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001004 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001005 int64_t sector, total_sectors;
1006 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001007 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001008 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001009 char filename[1024];
1010 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001011
bellard19cb3732006-08-19 11:45:59 +00001012 if (!drv)
1013 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001014
1015 if (!bs->backing_hd) {
1016 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001017 }
1018
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001019 if (bs->backing_hd->keep_read_only) {
1020 return -EACCES;
1021 }
Kevin Wolfee181192010-08-05 13:05:22 +02001022
1023 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001024 ro = bs->backing_hd->read_only;
1025 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1026 open_flags = bs->backing_hd->open_flags;
1027
1028 if (ro) {
1029 /* re-open as RW */
1030 bdrv_delete(bs->backing_hd);
1031 bs->backing_hd = NULL;
1032 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001033 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1034 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001035 if (rw_ret < 0) {
1036 bdrv_delete(bs_rw);
1037 /* try to re-open read-only */
1038 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001039 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1040 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001041 if (ret < 0) {
1042 bdrv_delete(bs_ro);
1043 /* drive not functional anymore */
1044 bs->drv = NULL;
1045 return ret;
1046 }
1047 bs->backing_hd = bs_ro;
1048 return rw_ret;
1049 }
1050 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001051 }
bellardea2384d2004-08-01 21:59:26 +00001052
Jan Kiszka6ea44302009-11-30 18:21:19 +01001053 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001054 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001055
Kevin Wolf8a426612010-07-16 17:17:01 +02001056 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001057 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001058
1059 if (bdrv_read(bs, sector, buf, n) != 0) {
1060 ret = -EIO;
1061 goto ro_cleanup;
1062 }
1063
1064 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1065 ret = -EIO;
1066 goto ro_cleanup;
1067 }
bellardea2384d2004-08-01 21:59:26 +00001068 }
1069 }
bellard95389c82005-12-18 18:28:15 +00001070
Christoph Hellwig1d449522010-01-17 12:32:30 +01001071 if (drv->bdrv_make_empty) {
1072 ret = drv->bdrv_make_empty(bs);
1073 bdrv_flush(bs);
1074 }
bellard95389c82005-12-18 18:28:15 +00001075
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001076 /*
1077 * Make sure all data we wrote to the backing device is actually
1078 * stable on disk.
1079 */
1080 if (bs->backing_hd)
1081 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001082
1083ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001084 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001085
1086 if (ro) {
1087 /* re-open as RO */
1088 bdrv_delete(bs->backing_hd);
1089 bs->backing_hd = NULL;
1090 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001091 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1092 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001093 if (ret < 0) {
1094 bdrv_delete(bs_ro);
1095 /* drive not functional anymore */
1096 bs->drv = NULL;
1097 return ret;
1098 }
1099 bs->backing_hd = bs_ro;
1100 bs->backing_hd->keep_read_only = 0;
1101 }
1102
Christoph Hellwig1d449522010-01-17 12:32:30 +01001103 return ret;
bellard33e39632003-07-06 17:15:21 +00001104}
1105
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001106void bdrv_commit_all(void)
1107{
1108 BlockDriverState *bs;
1109
1110 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1111 bdrv_commit(bs);
1112 }
1113}
1114
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001115struct BdrvTrackedRequest {
1116 BlockDriverState *bs;
1117 int64_t sector_num;
1118 int nb_sectors;
1119 bool is_write;
1120 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001121 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001122 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001123};
1124
1125/**
1126 * Remove an active request from the tracked requests list
1127 *
1128 * This function should be called when a tracked request is completing.
1129 */
1130static void tracked_request_end(BdrvTrackedRequest *req)
1131{
1132 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001133 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001134}
1135
1136/**
1137 * Add an active request to the tracked requests list
1138 */
1139static void tracked_request_begin(BdrvTrackedRequest *req,
1140 BlockDriverState *bs,
1141 int64_t sector_num,
1142 int nb_sectors, bool is_write)
1143{
1144 *req = (BdrvTrackedRequest){
1145 .bs = bs,
1146 .sector_num = sector_num,
1147 .nb_sectors = nb_sectors,
1148 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001149 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001150 };
1151
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001152 qemu_co_queue_init(&req->wait_queue);
1153
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001154 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1155}
1156
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001157/**
1158 * Round a region to cluster boundaries
1159 */
1160static void round_to_clusters(BlockDriverState *bs,
1161 int64_t sector_num, int nb_sectors,
1162 int64_t *cluster_sector_num,
1163 int *cluster_nb_sectors)
1164{
1165 BlockDriverInfo bdi;
1166
1167 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1168 *cluster_sector_num = sector_num;
1169 *cluster_nb_sectors = nb_sectors;
1170 } else {
1171 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1172 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1173 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1174 nb_sectors, c);
1175 }
1176}
1177
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001178static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1179 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001180 /* aaaa bbbb */
1181 if (sector_num >= req->sector_num + req->nb_sectors) {
1182 return false;
1183 }
1184 /* bbbb aaaa */
1185 if (req->sector_num >= sector_num + nb_sectors) {
1186 return false;
1187 }
1188 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001189}
1190
1191static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1192 int64_t sector_num, int nb_sectors)
1193{
1194 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001195 int64_t cluster_sector_num;
1196 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001197 bool retry;
1198
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001199 /* If we touch the same cluster it counts as an overlap. This guarantees
1200 * that allocating writes will be serialized and not race with each other
1201 * for the same cluster. For example, in copy-on-read it ensures that the
1202 * CoR read and write operations are atomic and guest writes cannot
1203 * interleave between them.
1204 */
1205 round_to_clusters(bs, sector_num, nb_sectors,
1206 &cluster_sector_num, &cluster_nb_sectors);
1207
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001208 do {
1209 retry = false;
1210 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001211 if (tracked_request_overlaps(req, cluster_sector_num,
1212 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001213 /* Hitting this means there was a reentrant request, for
1214 * example, a block driver issuing nested requests. This must
1215 * never happen since it means deadlock.
1216 */
1217 assert(qemu_coroutine_self() != req->co);
1218
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001219 qemu_co_queue_wait(&req->wait_queue);
1220 retry = true;
1221 break;
1222 }
1223 }
1224 } while (retry);
1225}
1226
Kevin Wolf756e6732010-01-12 12:55:17 +01001227/*
1228 * Return values:
1229 * 0 - success
1230 * -EINVAL - backing format specified, but no file
1231 * -ENOSPC - can't update the backing file because no space is left in the
1232 * image file header
1233 * -ENOTSUP - format driver doesn't support changing the backing file
1234 */
1235int bdrv_change_backing_file(BlockDriverState *bs,
1236 const char *backing_file, const char *backing_fmt)
1237{
1238 BlockDriver *drv = bs->drv;
1239
1240 if (drv->bdrv_change_backing_file != NULL) {
1241 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1242 } else {
1243 return -ENOTSUP;
1244 }
1245}
1246
aliguori71d07702009-03-03 17:37:16 +00001247static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1248 size_t size)
1249{
1250 int64_t len;
1251
1252 if (!bdrv_is_inserted(bs))
1253 return -ENOMEDIUM;
1254
1255 if (bs->growable)
1256 return 0;
1257
1258 len = bdrv_getlength(bs);
1259
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001260 if (offset < 0)
1261 return -EIO;
1262
1263 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001264 return -EIO;
1265
1266 return 0;
1267}
1268
1269static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1270 int nb_sectors)
1271{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001272 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1273 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001274}
1275
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001276typedef struct RwCo {
1277 BlockDriverState *bs;
1278 int64_t sector_num;
1279 int nb_sectors;
1280 QEMUIOVector *qiov;
1281 bool is_write;
1282 int ret;
1283} RwCo;
1284
1285static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1286{
1287 RwCo *rwco = opaque;
1288
1289 if (!rwco->is_write) {
1290 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1291 rwco->nb_sectors, rwco->qiov);
1292 } else {
1293 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1294 rwco->nb_sectors, rwco->qiov);
1295 }
1296}
1297
1298/*
1299 * Process a synchronous request using coroutines
1300 */
1301static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1302 int nb_sectors, bool is_write)
1303{
1304 QEMUIOVector qiov;
1305 struct iovec iov = {
1306 .iov_base = (void *)buf,
1307 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1308 };
1309 Coroutine *co;
1310 RwCo rwco = {
1311 .bs = bs,
1312 .sector_num = sector_num,
1313 .nb_sectors = nb_sectors,
1314 .qiov = &qiov,
1315 .is_write = is_write,
1316 .ret = NOT_DONE,
1317 };
1318
1319 qemu_iovec_init_external(&qiov, &iov, 1);
1320
1321 if (qemu_in_coroutine()) {
1322 /* Fast-path if already in coroutine context */
1323 bdrv_rw_co_entry(&rwco);
1324 } else {
1325 co = qemu_coroutine_create(bdrv_rw_co_entry);
1326 qemu_coroutine_enter(co, &rwco);
1327 while (rwco.ret == NOT_DONE) {
1328 qemu_aio_wait();
1329 }
1330 }
1331 return rwco.ret;
1332}
1333
bellard19cb3732006-08-19 11:45:59 +00001334/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001335int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001336 uint8_t *buf, int nb_sectors)
1337{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001338 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001339}
1340
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001341static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001342 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001343{
1344 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001345 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001346
Jan Kiszka6ea44302009-11-30 18:21:19 +01001347 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001348 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001349
1350 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001351 idx = start / (sizeof(unsigned long) * 8);
1352 bit = start % (sizeof(unsigned long) * 8);
1353 val = bs->dirty_bitmap[idx];
1354 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001355 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001356 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001357 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001358 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001359 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001360 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001361 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001362 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001363 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001364 }
1365 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001366 }
1367}
1368
ths5fafdf22007-09-16 21:08:06 +00001369/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001370 -EIO generic I/O error (may happen for all errors)
1371 -ENOMEDIUM No media inserted.
1372 -EINVAL Invalid sector number or nb_sectors
1373 -EACCES Trying to write a read-only device
1374*/
ths5fafdf22007-09-16 21:08:06 +00001375int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001376 const uint8_t *buf, int nb_sectors)
1377{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001378 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001379}
1380
aliguorieda578e2009-03-12 19:57:16 +00001381int bdrv_pread(BlockDriverState *bs, int64_t offset,
1382 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001383{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001384 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001385 int len, nb_sectors, count;
1386 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001387 int ret;
bellard83f64092006-08-01 16:21:11 +00001388
1389 count = count1;
1390 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001391 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001392 if (len > count)
1393 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001394 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001395 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001396 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1397 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001398 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001399 count -= len;
1400 if (count == 0)
1401 return count1;
1402 sector_num++;
1403 buf += len;
1404 }
1405
1406 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001407 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001408 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001409 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1410 return ret;
bellard83f64092006-08-01 16:21:11 +00001411 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001412 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001413 buf += len;
1414 count -= len;
1415 }
1416
1417 /* add data from the last sector */
1418 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001419 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1420 return ret;
bellard83f64092006-08-01 16:21:11 +00001421 memcpy(buf, tmp_buf, count);
1422 }
1423 return count1;
1424}
1425
aliguorieda578e2009-03-12 19:57:16 +00001426int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1427 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001428{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001429 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001430 int len, nb_sectors, count;
1431 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001432 int ret;
bellard83f64092006-08-01 16:21:11 +00001433
1434 count = count1;
1435 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001436 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001437 if (len > count)
1438 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001439 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001440 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001441 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1442 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001443 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001444 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1445 return ret;
bellard83f64092006-08-01 16:21:11 +00001446 count -= len;
1447 if (count == 0)
1448 return count1;
1449 sector_num++;
1450 buf += len;
1451 }
1452
1453 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001454 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001455 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001456 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1457 return ret;
bellard83f64092006-08-01 16:21:11 +00001458 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001459 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001460 buf += len;
1461 count -= len;
1462 }
1463
1464 /* add data from the last sector */
1465 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001466 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1467 return ret;
bellard83f64092006-08-01 16:21:11 +00001468 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001469 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1470 return ret;
bellard83f64092006-08-01 16:21:11 +00001471 }
1472 return count1;
1473}
bellard83f64092006-08-01 16:21:11 +00001474
Kevin Wolff08145f2010-06-16 16:38:15 +02001475/*
1476 * Writes to the file and ensures that no writes are reordered across this
1477 * request (acts as a barrier)
1478 *
1479 * Returns 0 on success, -errno in error cases.
1480 */
1481int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1482 const void *buf, int count)
1483{
1484 int ret;
1485
1486 ret = bdrv_pwrite(bs, offset, buf, count);
1487 if (ret < 0) {
1488 return ret;
1489 }
1490
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001491 /* No flush needed for cache modes that use O_DSYNC */
1492 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001493 bdrv_flush(bs);
1494 }
1495
1496 return 0;
1497}
1498
Stefan Hajnocziab185922011-11-17 13:40:31 +00001499static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1500 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1501{
1502 /* Perform I/O through a temporary buffer so that users who scribble over
1503 * their read buffer while the operation is in progress do not end up
1504 * modifying the image file. This is critical for zero-copy guest I/O
1505 * where anything might happen inside guest memory.
1506 */
1507 void *bounce_buffer;
1508
1509 struct iovec iov;
1510 QEMUIOVector bounce_qiov;
1511 int64_t cluster_sector_num;
1512 int cluster_nb_sectors;
1513 size_t skip_bytes;
1514 int ret;
1515
1516 /* Cover entire cluster so no additional backing file I/O is required when
1517 * allocating cluster in the image file.
1518 */
1519 round_to_clusters(bs, sector_num, nb_sectors,
1520 &cluster_sector_num, &cluster_nb_sectors);
1521
1522 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1523 cluster_sector_num, cluster_nb_sectors);
1524
1525 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1526 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1527 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1528
1529 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1530 &bounce_qiov);
1531 if (ret < 0) {
1532 goto err;
1533 }
1534
1535 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1536 &bounce_qiov);
1537 if (ret < 0) {
1538 /* It might be okay to ignore write errors for guest requests. If this
1539 * is a deliberate copy-on-read then we don't want to ignore the error.
1540 * Simply report it in all cases.
1541 */
1542 goto err;
1543 }
1544
1545 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1546 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1547 nb_sectors * BDRV_SECTOR_SIZE);
1548
1549err:
1550 qemu_vfree(bounce_buffer);
1551 return ret;
1552}
1553
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001554/*
1555 * Handle a read request in coroutine context
1556 */
1557static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1558 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001559{
1560 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001561 BdrvTrackedRequest req;
1562 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001563
Kevin Wolfda1fa912011-07-14 17:27:13 +02001564 if (!drv) {
1565 return -ENOMEDIUM;
1566 }
1567 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1568 return -EIO;
1569 }
1570
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001571 /* throttling disk read I/O */
1572 if (bs->io_limits_enabled) {
1573 bdrv_io_limits_intercept(bs, false, nb_sectors);
1574 }
1575
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001576 if (bs->copy_on_read) {
1577 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1578 }
1579
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001580 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001581
1582 if (bs->copy_on_read) {
1583 int pnum;
1584
1585 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1586 if (ret < 0) {
1587 goto out;
1588 }
1589
1590 if (!ret || pnum != nb_sectors) {
1591 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1592 goto out;
1593 }
1594 }
1595
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001596 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001597
1598out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001599 tracked_request_end(&req);
1600 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001601}
1602
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001603int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001604 int nb_sectors, QEMUIOVector *qiov)
1605{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001606 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001607
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001608 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1609}
1610
1611/*
1612 * Handle a write request in coroutine context
1613 */
1614static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1615 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1616{
1617 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001618 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001619 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001620
1621 if (!bs->drv) {
1622 return -ENOMEDIUM;
1623 }
1624 if (bs->read_only) {
1625 return -EACCES;
1626 }
1627 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1628 return -EIO;
1629 }
1630
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001631 /* throttling disk write I/O */
1632 if (bs->io_limits_enabled) {
1633 bdrv_io_limits_intercept(bs, true, nb_sectors);
1634 }
1635
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001636 if (bs->copy_on_read) {
1637 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1638 }
1639
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001640 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1641
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001642 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1643
Kevin Wolfda1fa912011-07-14 17:27:13 +02001644 if (bs->dirty_bitmap) {
1645 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1646 }
1647
1648 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1649 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1650 }
1651
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001652 tracked_request_end(&req);
1653
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001654 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001655}
1656
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001657int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1658 int nb_sectors, QEMUIOVector *qiov)
1659{
1660 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1661
1662 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1663}
1664
bellard83f64092006-08-01 16:21:11 +00001665/**
bellard83f64092006-08-01 16:21:11 +00001666 * Truncate file to 'offset' bytes (needed only for file protocols)
1667 */
1668int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1669{
1670 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001671 int ret;
bellard83f64092006-08-01 16:21:11 +00001672 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001673 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001674 if (!drv->bdrv_truncate)
1675 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001676 if (bs->read_only)
1677 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001678 if (bdrv_in_use(bs))
1679 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001680 ret = drv->bdrv_truncate(bs, offset);
1681 if (ret == 0) {
1682 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001683 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001684 }
1685 return ret;
bellard83f64092006-08-01 16:21:11 +00001686}
1687
1688/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001689 * Length of a allocated file in bytes. Sparse files are counted by actual
1690 * allocated space. Return < 0 if error or unknown.
1691 */
1692int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1693{
1694 BlockDriver *drv = bs->drv;
1695 if (!drv) {
1696 return -ENOMEDIUM;
1697 }
1698 if (drv->bdrv_get_allocated_file_size) {
1699 return drv->bdrv_get_allocated_file_size(bs);
1700 }
1701 if (bs->file) {
1702 return bdrv_get_allocated_file_size(bs->file);
1703 }
1704 return -ENOTSUP;
1705}
1706
1707/**
bellard83f64092006-08-01 16:21:11 +00001708 * Length of a file in bytes. Return < 0 if error or unknown.
1709 */
1710int64_t bdrv_getlength(BlockDriverState *bs)
1711{
1712 BlockDriver *drv = bs->drv;
1713 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001714 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001715
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001716 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001717 if (drv->bdrv_getlength) {
1718 return drv->bdrv_getlength(bs);
1719 }
bellard83f64092006-08-01 16:21:11 +00001720 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001721 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001722}
1723
bellard19cb3732006-08-19 11:45:59 +00001724/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001725void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001726{
bellard19cb3732006-08-19 11:45:59 +00001727 int64_t length;
1728 length = bdrv_getlength(bs);
1729 if (length < 0)
1730 length = 0;
1731 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001732 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001733 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001734}
bellardcf989512004-02-16 21:56:36 +00001735
aliguorif3d54fc2008-11-25 21:50:24 +00001736struct partition {
1737 uint8_t boot_ind; /* 0x80 - active */
1738 uint8_t head; /* starting head */
1739 uint8_t sector; /* starting sector */
1740 uint8_t cyl; /* starting cylinder */
1741 uint8_t sys_ind; /* What partition type */
1742 uint8_t end_head; /* end head */
1743 uint8_t end_sector; /* end sector */
1744 uint8_t end_cyl; /* end cylinder */
1745 uint32_t start_sect; /* starting sector counting from 0 */
1746 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001747} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001748
1749/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1750static int guess_disk_lchs(BlockDriverState *bs,
1751 int *pcylinders, int *pheads, int *psectors)
1752{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001753 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001754 int ret, i, heads, sectors, cylinders;
1755 struct partition *p;
1756 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001757 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001758
1759 bdrv_get_geometry(bs, &nb_sectors);
1760
1761 ret = bdrv_read(bs, 0, buf, 1);
1762 if (ret < 0)
1763 return -1;
1764 /* test msdos magic */
1765 if (buf[510] != 0x55 || buf[511] != 0xaa)
1766 return -1;
1767 for(i = 0; i < 4; i++) {
1768 p = ((struct partition *)(buf + 0x1be)) + i;
1769 nr_sects = le32_to_cpu(p->nr_sects);
1770 if (nr_sects && p->end_head) {
1771 /* We make the assumption that the partition terminates on
1772 a cylinder boundary */
1773 heads = p->end_head + 1;
1774 sectors = p->end_sector & 63;
1775 if (sectors == 0)
1776 continue;
1777 cylinders = nb_sectors / (heads * sectors);
1778 if (cylinders < 1 || cylinders > 16383)
1779 continue;
1780 *pheads = heads;
1781 *psectors = sectors;
1782 *pcylinders = cylinders;
1783#if 0
1784 printf("guessed geometry: LCHS=%d %d %d\n",
1785 cylinders, heads, sectors);
1786#endif
1787 return 0;
1788 }
1789 }
1790 return -1;
1791}
1792
1793void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1794{
1795 int translation, lba_detected = 0;
1796 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001797 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001798
1799 /* if a geometry hint is available, use it */
1800 bdrv_get_geometry(bs, &nb_sectors);
1801 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1802 translation = bdrv_get_translation_hint(bs);
1803 if (cylinders != 0) {
1804 *pcyls = cylinders;
1805 *pheads = heads;
1806 *psecs = secs;
1807 } else {
1808 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1809 if (heads > 16) {
1810 /* if heads > 16, it means that a BIOS LBA
1811 translation was active, so the default
1812 hardware geometry is OK */
1813 lba_detected = 1;
1814 goto default_geometry;
1815 } else {
1816 *pcyls = cylinders;
1817 *pheads = heads;
1818 *psecs = secs;
1819 /* disable any translation to be in sync with
1820 the logical geometry */
1821 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1822 bdrv_set_translation_hint(bs,
1823 BIOS_ATA_TRANSLATION_NONE);
1824 }
1825 }
1826 } else {
1827 default_geometry:
1828 /* if no geometry, use a standard physical disk geometry */
1829 cylinders = nb_sectors / (16 * 63);
1830
1831 if (cylinders > 16383)
1832 cylinders = 16383;
1833 else if (cylinders < 2)
1834 cylinders = 2;
1835 *pcyls = cylinders;
1836 *pheads = 16;
1837 *psecs = 63;
1838 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1839 if ((*pcyls * *pheads) <= 131072) {
1840 bdrv_set_translation_hint(bs,
1841 BIOS_ATA_TRANSLATION_LARGE);
1842 } else {
1843 bdrv_set_translation_hint(bs,
1844 BIOS_ATA_TRANSLATION_LBA);
1845 }
1846 }
1847 }
1848 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1849 }
1850}
1851
ths5fafdf22007-09-16 21:08:06 +00001852void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001853 int cyls, int heads, int secs)
1854{
1855 bs->cyls = cyls;
1856 bs->heads = heads;
1857 bs->secs = secs;
1858}
1859
bellard46d47672004-11-16 01:45:27 +00001860void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1861{
1862 bs->translation = translation;
1863}
1864
ths5fafdf22007-09-16 21:08:06 +00001865void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001866 int *pcyls, int *pheads, int *psecs)
1867{
1868 *pcyls = bs->cyls;
1869 *pheads = bs->heads;
1870 *psecs = bs->secs;
1871}
1872
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001873/* throttling disk io limits */
1874void bdrv_set_io_limits(BlockDriverState *bs,
1875 BlockIOLimit *io_limits)
1876{
1877 bs->io_limits = *io_limits;
1878 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1879}
1880
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001881/* Recognize floppy formats */
1882typedef struct FDFormat {
1883 FDriveType drive;
1884 uint8_t last_sect;
1885 uint8_t max_track;
1886 uint8_t max_head;
1887} FDFormat;
1888
1889static const FDFormat fd_formats[] = {
1890 /* First entry is default format */
1891 /* 1.44 MB 3"1/2 floppy disks */
1892 { FDRIVE_DRV_144, 18, 80, 1, },
1893 { FDRIVE_DRV_144, 20, 80, 1, },
1894 { FDRIVE_DRV_144, 21, 80, 1, },
1895 { FDRIVE_DRV_144, 21, 82, 1, },
1896 { FDRIVE_DRV_144, 21, 83, 1, },
1897 { FDRIVE_DRV_144, 22, 80, 1, },
1898 { FDRIVE_DRV_144, 23, 80, 1, },
1899 { FDRIVE_DRV_144, 24, 80, 1, },
1900 /* 2.88 MB 3"1/2 floppy disks */
1901 { FDRIVE_DRV_288, 36, 80, 1, },
1902 { FDRIVE_DRV_288, 39, 80, 1, },
1903 { FDRIVE_DRV_288, 40, 80, 1, },
1904 { FDRIVE_DRV_288, 44, 80, 1, },
1905 { FDRIVE_DRV_288, 48, 80, 1, },
1906 /* 720 kB 3"1/2 floppy disks */
1907 { FDRIVE_DRV_144, 9, 80, 1, },
1908 { FDRIVE_DRV_144, 10, 80, 1, },
1909 { FDRIVE_DRV_144, 10, 82, 1, },
1910 { FDRIVE_DRV_144, 10, 83, 1, },
1911 { FDRIVE_DRV_144, 13, 80, 1, },
1912 { FDRIVE_DRV_144, 14, 80, 1, },
1913 /* 1.2 MB 5"1/4 floppy disks */
1914 { FDRIVE_DRV_120, 15, 80, 1, },
1915 { FDRIVE_DRV_120, 18, 80, 1, },
1916 { FDRIVE_DRV_120, 18, 82, 1, },
1917 { FDRIVE_DRV_120, 18, 83, 1, },
1918 { FDRIVE_DRV_120, 20, 80, 1, },
1919 /* 720 kB 5"1/4 floppy disks */
1920 { FDRIVE_DRV_120, 9, 80, 1, },
1921 { FDRIVE_DRV_120, 11, 80, 1, },
1922 /* 360 kB 5"1/4 floppy disks */
1923 { FDRIVE_DRV_120, 9, 40, 1, },
1924 { FDRIVE_DRV_120, 9, 40, 0, },
1925 { FDRIVE_DRV_120, 10, 41, 1, },
1926 { FDRIVE_DRV_120, 10, 42, 1, },
1927 /* 320 kB 5"1/4 floppy disks */
1928 { FDRIVE_DRV_120, 8, 40, 1, },
1929 { FDRIVE_DRV_120, 8, 40, 0, },
1930 /* 360 kB must match 5"1/4 better than 3"1/2... */
1931 { FDRIVE_DRV_144, 9, 80, 0, },
1932 /* end */
1933 { FDRIVE_DRV_NONE, -1, -1, 0, },
1934};
1935
1936void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1937 int *max_track, int *last_sect,
1938 FDriveType drive_in, FDriveType *drive)
1939{
1940 const FDFormat *parse;
1941 uint64_t nb_sectors, size;
1942 int i, first_match, match;
1943
1944 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1945 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1946 /* User defined disk */
1947 } else {
1948 bdrv_get_geometry(bs, &nb_sectors);
1949 match = -1;
1950 first_match = -1;
1951 for (i = 0; ; i++) {
1952 parse = &fd_formats[i];
1953 if (parse->drive == FDRIVE_DRV_NONE) {
1954 break;
1955 }
1956 if (drive_in == parse->drive ||
1957 drive_in == FDRIVE_DRV_NONE) {
1958 size = (parse->max_head + 1) * parse->max_track *
1959 parse->last_sect;
1960 if (nb_sectors == size) {
1961 match = i;
1962 break;
1963 }
1964 if (first_match == -1) {
1965 first_match = i;
1966 }
1967 }
1968 }
1969 if (match == -1) {
1970 if (first_match == -1) {
1971 match = 1;
1972 } else {
1973 match = first_match;
1974 }
1975 parse = &fd_formats[match];
1976 }
1977 *nb_heads = parse->max_head + 1;
1978 *max_track = parse->max_track;
1979 *last_sect = parse->last_sect;
1980 *drive = parse->drive;
1981 }
1982}
1983
bellard46d47672004-11-16 01:45:27 +00001984int bdrv_get_translation_hint(BlockDriverState *bs)
1985{
1986 return bs->translation;
1987}
1988
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001989void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1990 BlockErrorAction on_write_error)
1991{
1992 bs->on_read_error = on_read_error;
1993 bs->on_write_error = on_write_error;
1994}
1995
1996BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1997{
1998 return is_read ? bs->on_read_error : bs->on_write_error;
1999}
2000
bellardb3380822004-03-14 21:38:54 +00002001int bdrv_is_read_only(BlockDriverState *bs)
2002{
2003 return bs->read_only;
2004}
2005
ths985a03b2007-12-24 16:10:43 +00002006int bdrv_is_sg(BlockDriverState *bs)
2007{
2008 return bs->sg;
2009}
2010
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002011int bdrv_enable_write_cache(BlockDriverState *bs)
2012{
2013 return bs->enable_write_cache;
2014}
2015
bellardea2384d2004-08-01 21:59:26 +00002016int bdrv_is_encrypted(BlockDriverState *bs)
2017{
2018 if (bs->backing_hd && bs->backing_hd->encrypted)
2019 return 1;
2020 return bs->encrypted;
2021}
2022
aliguoric0f4ce72009-03-05 23:01:01 +00002023int bdrv_key_required(BlockDriverState *bs)
2024{
2025 BlockDriverState *backing_hd = bs->backing_hd;
2026
2027 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2028 return 1;
2029 return (bs->encrypted && !bs->valid_key);
2030}
2031
bellardea2384d2004-08-01 21:59:26 +00002032int bdrv_set_key(BlockDriverState *bs, const char *key)
2033{
2034 int ret;
2035 if (bs->backing_hd && bs->backing_hd->encrypted) {
2036 ret = bdrv_set_key(bs->backing_hd, key);
2037 if (ret < 0)
2038 return ret;
2039 if (!bs->encrypted)
2040 return 0;
2041 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002042 if (!bs->encrypted) {
2043 return -EINVAL;
2044 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2045 return -ENOMEDIUM;
2046 }
aliguoric0f4ce72009-03-05 23:01:01 +00002047 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002048 if (ret < 0) {
2049 bs->valid_key = 0;
2050 } else if (!bs->valid_key) {
2051 bs->valid_key = 1;
2052 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002053 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002054 }
aliguoric0f4ce72009-03-05 23:01:01 +00002055 return ret;
bellardea2384d2004-08-01 21:59:26 +00002056}
2057
2058void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2059{
bellard19cb3732006-08-19 11:45:59 +00002060 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002061 buf[0] = '\0';
2062 } else {
2063 pstrcpy(buf, buf_size, bs->drv->format_name);
2064 }
2065}
2066
ths5fafdf22007-09-16 21:08:06 +00002067void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002068 void *opaque)
2069{
2070 BlockDriver *drv;
2071
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002072 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002073 it(opaque, drv->format_name);
2074 }
2075}
2076
bellardb3380822004-03-14 21:38:54 +00002077BlockDriverState *bdrv_find(const char *name)
2078{
2079 BlockDriverState *bs;
2080
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002081 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2082 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002083 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002084 }
bellardb3380822004-03-14 21:38:54 +00002085 }
2086 return NULL;
2087}
2088
Markus Armbruster2f399b02010-06-02 18:55:20 +02002089BlockDriverState *bdrv_next(BlockDriverState *bs)
2090{
2091 if (!bs) {
2092 return QTAILQ_FIRST(&bdrv_states);
2093 }
2094 return QTAILQ_NEXT(bs, list);
2095}
2096
aliguori51de9762009-03-05 23:00:43 +00002097void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002098{
2099 BlockDriverState *bs;
2100
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002101 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002102 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002103 }
2104}
2105
bellardea2384d2004-08-01 21:59:26 +00002106const char *bdrv_get_device_name(BlockDriverState *bs)
2107{
2108 return bs->device_name;
2109}
2110
aliguoric6ca28d2008-10-06 13:55:43 +00002111void bdrv_flush_all(void)
2112{
2113 BlockDriverState *bs;
2114
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002115 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002116 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002117 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002118 }
2119 }
aliguoric6ca28d2008-10-06 13:55:43 +00002120}
2121
Kevin Wolff2feebb2010-04-14 17:30:35 +02002122int bdrv_has_zero_init(BlockDriverState *bs)
2123{
2124 assert(bs->drv);
2125
Kevin Wolf336c1c12010-07-28 11:26:29 +02002126 if (bs->drv->bdrv_has_zero_init) {
2127 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002128 }
2129
2130 return 1;
2131}
2132
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002133typedef struct BdrvCoIsAllocatedData {
2134 BlockDriverState *bs;
2135 int64_t sector_num;
2136 int nb_sectors;
2137 int *pnum;
2138 int ret;
2139 bool done;
2140} BdrvCoIsAllocatedData;
2141
thsf58c7b32008-06-05 21:53:49 +00002142/*
2143 * Returns true iff the specified sector is present in the disk image. Drivers
2144 * not implementing the functionality are assumed to not support backing files,
2145 * hence all their sectors are reported as allocated.
2146 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002147 * If 'sector_num' is beyond the end of the disk image the return value is 0
2148 * and 'pnum' is set to 0.
2149 *
thsf58c7b32008-06-05 21:53:49 +00002150 * 'pnum' is set to the number of sectors (including and immediately following
2151 * the specified sector) that are known to be in the same
2152 * allocated/unallocated state.
2153 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002154 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2155 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002156 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002157int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2158 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002159{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002160 int64_t n;
2161
2162 if (sector_num >= bs->total_sectors) {
2163 *pnum = 0;
2164 return 0;
2165 }
2166
2167 n = bs->total_sectors - sector_num;
2168 if (n < nb_sectors) {
2169 nb_sectors = n;
2170 }
2171
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002172 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002173 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002174 return 1;
2175 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002176
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002177 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2178}
2179
2180/* Coroutine wrapper for bdrv_is_allocated() */
2181static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2182{
2183 BdrvCoIsAllocatedData *data = opaque;
2184 BlockDriverState *bs = data->bs;
2185
2186 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2187 data->pnum);
2188 data->done = true;
2189}
2190
2191/*
2192 * Synchronous wrapper around bdrv_co_is_allocated().
2193 *
2194 * See bdrv_co_is_allocated() for details.
2195 */
2196int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2197 int *pnum)
2198{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002199 Coroutine *co;
2200 BdrvCoIsAllocatedData data = {
2201 .bs = bs,
2202 .sector_num = sector_num,
2203 .nb_sectors = nb_sectors,
2204 .pnum = pnum,
2205 .done = false,
2206 };
2207
2208 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2209 qemu_coroutine_enter(co, &data);
2210 while (!data.done) {
2211 qemu_aio_wait();
2212 }
2213 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002214}
2215
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002216void bdrv_mon_event(const BlockDriverState *bdrv,
2217 BlockMonEventAction action, int is_read)
2218{
2219 QObject *data;
2220 const char *action_str;
2221
2222 switch (action) {
2223 case BDRV_ACTION_REPORT:
2224 action_str = "report";
2225 break;
2226 case BDRV_ACTION_IGNORE:
2227 action_str = "ignore";
2228 break;
2229 case BDRV_ACTION_STOP:
2230 action_str = "stop";
2231 break;
2232 default:
2233 abort();
2234 }
2235
2236 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2237 bdrv->device_name,
2238 action_str,
2239 is_read ? "read" : "write");
2240 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2241
2242 qobject_decref(data);
2243}
2244
Luiz Capitulinob2023812011-09-21 17:16:47 -03002245BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002246{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002247 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002248 BlockDriverState *bs;
2249
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002250 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002251 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002252
Luiz Capitulinob2023812011-09-21 17:16:47 -03002253 info->value = g_malloc0(sizeof(*info->value));
2254 info->value->device = g_strdup(bs->device_name);
2255 info->value->type = g_strdup("unknown");
2256 info->value->locked = bdrv_dev_is_medium_locked(bs);
2257 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002258
Markus Armbrustere4def802011-09-06 18:58:53 +02002259 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002260 info->value->has_tray_open = true;
2261 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002262 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002263
2264 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002265 info->value->has_io_status = true;
2266 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002267 }
2268
bellard19cb3732006-08-19 11:45:59 +00002269 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002270 info->value->has_inserted = true;
2271 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2272 info->value->inserted->file = g_strdup(bs->filename);
2273 info->value->inserted->ro = bs->read_only;
2274 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2275 info->value->inserted->encrypted = bs->encrypted;
2276 if (bs->backing_file[0]) {
2277 info->value->inserted->has_backing_file = true;
2278 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002279 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002280
2281 if (bs->io_limits_enabled) {
2282 info->value->inserted->bps =
2283 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2284 info->value->inserted->bps_rd =
2285 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2286 info->value->inserted->bps_wr =
2287 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2288 info->value->inserted->iops =
2289 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2290 info->value->inserted->iops_rd =
2291 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2292 info->value->inserted->iops_wr =
2293 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2294 }
bellardb3380822004-03-14 21:38:54 +00002295 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002296
2297 /* XXX: waiting for the qapi to support GSList */
2298 if (!cur_item) {
2299 head = cur_item = info;
2300 } else {
2301 cur_item->next = info;
2302 cur_item = info;
2303 }
bellardb3380822004-03-14 21:38:54 +00002304 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002305
Luiz Capitulinob2023812011-09-21 17:16:47 -03002306 return head;
bellardb3380822004-03-14 21:38:54 +00002307}
thsa36e69d2007-12-02 05:18:19 +00002308
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002309/* Consider exposing this as a full fledged QMP command */
2310static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002311{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002312 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002313
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002314 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002315
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002316 if (bs->device_name[0]) {
2317 s->has_device = true;
2318 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002319 }
2320
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002321 s->stats = g_malloc0(sizeof(*s->stats));
2322 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2323 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2324 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2325 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2326 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2327 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2328 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2329 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2330 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2331
Kevin Wolf294cc352010-04-28 14:34:01 +02002332 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002333 s->has_parent = true;
2334 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002335 }
2336
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002337 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002338}
2339
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002340BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002341{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002342 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002343 BlockDriverState *bs;
2344
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002345 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002346 BlockStatsList *info = g_malloc0(sizeof(*info));
2347 info->value = qmp_query_blockstat(bs, NULL);
2348
2349 /* XXX: waiting for the qapi to support GSList */
2350 if (!cur_item) {
2351 head = cur_item = info;
2352 } else {
2353 cur_item->next = info;
2354 cur_item = info;
2355 }
thsa36e69d2007-12-02 05:18:19 +00002356 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002357
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002358 return head;
thsa36e69d2007-12-02 05:18:19 +00002359}
bellardea2384d2004-08-01 21:59:26 +00002360
aliguori045df332009-03-05 23:00:48 +00002361const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2362{
2363 if (bs->backing_hd && bs->backing_hd->encrypted)
2364 return bs->backing_file;
2365 else if (bs->encrypted)
2366 return bs->filename;
2367 else
2368 return NULL;
2369}
2370
ths5fafdf22007-09-16 21:08:06 +00002371void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002372 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002373{
Kevin Wolf3574c602011-10-26 11:02:11 +02002374 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002375}
2376
ths5fafdf22007-09-16 21:08:06 +00002377int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002378 const uint8_t *buf, int nb_sectors)
2379{
2380 BlockDriver *drv = bs->drv;
2381 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002382 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002383 if (!drv->bdrv_write_compressed)
2384 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002385 if (bdrv_check_request(bs, sector_num, nb_sectors))
2386 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002387
Jan Kiszkac6d22832009-11-30 18:21:20 +01002388 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002389 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2390 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002391
bellardfaea38e2006-08-05 21:31:00 +00002392 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2393}
ths3b46e622007-09-17 08:09:54 +00002394
bellardfaea38e2006-08-05 21:31:00 +00002395int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2396{
2397 BlockDriver *drv = bs->drv;
2398 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002399 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002400 if (!drv->bdrv_get_info)
2401 return -ENOTSUP;
2402 memset(bdi, 0, sizeof(*bdi));
2403 return drv->bdrv_get_info(bs, bdi);
2404}
2405
Christoph Hellwig45566e92009-07-10 23:11:57 +02002406int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2407 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002408{
2409 BlockDriver *drv = bs->drv;
2410 if (!drv)
2411 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002412 if (drv->bdrv_save_vmstate)
2413 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2414 if (bs->file)
2415 return bdrv_save_vmstate(bs->file, buf, pos, size);
2416 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002417}
2418
Christoph Hellwig45566e92009-07-10 23:11:57 +02002419int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2420 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002421{
2422 BlockDriver *drv = bs->drv;
2423 if (!drv)
2424 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002425 if (drv->bdrv_load_vmstate)
2426 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2427 if (bs->file)
2428 return bdrv_load_vmstate(bs->file, buf, pos, size);
2429 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002430}
2431
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002432void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2433{
2434 BlockDriver *drv = bs->drv;
2435
2436 if (!drv || !drv->bdrv_debug_event) {
2437 return;
2438 }
2439
2440 return drv->bdrv_debug_event(bs, event);
2441
2442}
2443
bellardfaea38e2006-08-05 21:31:00 +00002444/**************************************************************/
2445/* handling of snapshots */
2446
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002447int bdrv_can_snapshot(BlockDriverState *bs)
2448{
2449 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002450 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002451 return 0;
2452 }
2453
2454 if (!drv->bdrv_snapshot_create) {
2455 if (bs->file != NULL) {
2456 return bdrv_can_snapshot(bs->file);
2457 }
2458 return 0;
2459 }
2460
2461 return 1;
2462}
2463
Blue Swirl199630b2010-07-25 20:49:34 +00002464int bdrv_is_snapshot(BlockDriverState *bs)
2465{
2466 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2467}
2468
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002469BlockDriverState *bdrv_snapshots(void)
2470{
2471 BlockDriverState *bs;
2472
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002473 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002474 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002475 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002476
2477 bs = NULL;
2478 while ((bs = bdrv_next(bs))) {
2479 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002480 bs_snapshots = bs;
2481 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002482 }
2483 }
2484 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002485}
2486
ths5fafdf22007-09-16 21:08:06 +00002487int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002488 QEMUSnapshotInfo *sn_info)
2489{
2490 BlockDriver *drv = bs->drv;
2491 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002492 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002493 if (drv->bdrv_snapshot_create)
2494 return drv->bdrv_snapshot_create(bs, sn_info);
2495 if (bs->file)
2496 return bdrv_snapshot_create(bs->file, sn_info);
2497 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002498}
2499
ths5fafdf22007-09-16 21:08:06 +00002500int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002501 const char *snapshot_id)
2502{
2503 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002504 int ret, open_ret;
2505
bellardfaea38e2006-08-05 21:31:00 +00002506 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002507 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002508 if (drv->bdrv_snapshot_goto)
2509 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2510
2511 if (bs->file) {
2512 drv->bdrv_close(bs);
2513 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2514 open_ret = drv->bdrv_open(bs, bs->open_flags);
2515 if (open_ret < 0) {
2516 bdrv_delete(bs->file);
2517 bs->drv = NULL;
2518 return open_ret;
2519 }
2520 return ret;
2521 }
2522
2523 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002524}
2525
2526int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2527{
2528 BlockDriver *drv = bs->drv;
2529 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002530 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002531 if (drv->bdrv_snapshot_delete)
2532 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2533 if (bs->file)
2534 return bdrv_snapshot_delete(bs->file, snapshot_id);
2535 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002536}
2537
ths5fafdf22007-09-16 21:08:06 +00002538int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002539 QEMUSnapshotInfo **psn_info)
2540{
2541 BlockDriver *drv = bs->drv;
2542 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002543 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002544 if (drv->bdrv_snapshot_list)
2545 return drv->bdrv_snapshot_list(bs, psn_info);
2546 if (bs->file)
2547 return bdrv_snapshot_list(bs->file, psn_info);
2548 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002549}
2550
edison51ef6722010-09-21 19:58:41 -07002551int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2552 const char *snapshot_name)
2553{
2554 BlockDriver *drv = bs->drv;
2555 if (!drv) {
2556 return -ENOMEDIUM;
2557 }
2558 if (!bs->read_only) {
2559 return -EINVAL;
2560 }
2561 if (drv->bdrv_snapshot_load_tmp) {
2562 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2563 }
2564 return -ENOTSUP;
2565}
2566
bellardfaea38e2006-08-05 21:31:00 +00002567#define NB_SUFFIXES 4
2568
2569char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2570{
2571 static const char suffixes[NB_SUFFIXES] = "KMGT";
2572 int64_t base;
2573 int i;
2574
2575 if (size <= 999) {
2576 snprintf(buf, buf_size, "%" PRId64, size);
2577 } else {
2578 base = 1024;
2579 for(i = 0; i < NB_SUFFIXES; i++) {
2580 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002581 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002582 (double)size / base,
2583 suffixes[i]);
2584 break;
2585 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002586 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002587 ((size + (base >> 1)) / base),
2588 suffixes[i]);
2589 break;
2590 }
2591 base = base * 1024;
2592 }
2593 }
2594 return buf;
2595}
2596
2597char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2598{
2599 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002600#ifdef _WIN32
2601 struct tm *ptm;
2602#else
bellardfaea38e2006-08-05 21:31:00 +00002603 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002604#endif
bellardfaea38e2006-08-05 21:31:00 +00002605 time_t ti;
2606 int64_t secs;
2607
2608 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002609 snprintf(buf, buf_size,
2610 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002611 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2612 } else {
2613 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002614#ifdef _WIN32
2615 ptm = localtime(&ti);
2616 strftime(date_buf, sizeof(date_buf),
2617 "%Y-%m-%d %H:%M:%S", ptm);
2618#else
bellardfaea38e2006-08-05 21:31:00 +00002619 localtime_r(&ti, &tm);
2620 strftime(date_buf, sizeof(date_buf),
2621 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002622#endif
bellardfaea38e2006-08-05 21:31:00 +00002623 secs = sn->vm_clock_nsec / 1000000000;
2624 snprintf(clock_buf, sizeof(clock_buf),
2625 "%02d:%02d:%02d.%03d",
2626 (int)(secs / 3600),
2627 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002628 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002629 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2630 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002631 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002632 sn->id_str, sn->name,
2633 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2634 date_buf,
2635 clock_buf);
2636 }
2637 return buf;
2638}
2639
bellard83f64092006-08-01 16:21:11 +00002640/**************************************************************/
2641/* async I/Os */
2642
aliguori3b69e4b2009-01-22 16:59:24 +00002643BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002644 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002645 BlockDriverCompletionFunc *cb, void *opaque)
2646{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002647 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2648
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002649 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002650 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002651}
2652
aliguorif141eaf2009-04-07 18:43:24 +00002653BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2654 QEMUIOVector *qiov, int nb_sectors,
2655 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002656{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002657 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2658
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002659 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002660 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002661}
2662
Kevin Wolf40b4f532009-09-09 17:53:37 +02002663
2664typedef struct MultiwriteCB {
2665 int error;
2666 int num_requests;
2667 int num_callbacks;
2668 struct {
2669 BlockDriverCompletionFunc *cb;
2670 void *opaque;
2671 QEMUIOVector *free_qiov;
2672 void *free_buf;
2673 } callbacks[];
2674} MultiwriteCB;
2675
2676static void multiwrite_user_cb(MultiwriteCB *mcb)
2677{
2678 int i;
2679
2680 for (i = 0; i < mcb->num_callbacks; i++) {
2681 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002682 if (mcb->callbacks[i].free_qiov) {
2683 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2684 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002685 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002686 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002687 }
2688}
2689
2690static void multiwrite_cb(void *opaque, int ret)
2691{
2692 MultiwriteCB *mcb = opaque;
2693
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002694 trace_multiwrite_cb(mcb, ret);
2695
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002696 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002697 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002698 }
2699
2700 mcb->num_requests--;
2701 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002702 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002703 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002704 }
2705}
2706
2707static int multiwrite_req_compare(const void *a, const void *b)
2708{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002709 const BlockRequest *req1 = a, *req2 = b;
2710
2711 /*
2712 * Note that we can't simply subtract req2->sector from req1->sector
2713 * here as that could overflow the return value.
2714 */
2715 if (req1->sector > req2->sector) {
2716 return 1;
2717 } else if (req1->sector < req2->sector) {
2718 return -1;
2719 } else {
2720 return 0;
2721 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002722}
2723
2724/*
2725 * Takes a bunch of requests and tries to merge them. Returns the number of
2726 * requests that remain after merging.
2727 */
2728static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2729 int num_reqs, MultiwriteCB *mcb)
2730{
2731 int i, outidx;
2732
2733 // Sort requests by start sector
2734 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2735
2736 // Check if adjacent requests touch the same clusters. If so, combine them,
2737 // filling up gaps with zero sectors.
2738 outidx = 0;
2739 for (i = 1; i < num_reqs; i++) {
2740 int merge = 0;
2741 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2742
2743 // This handles the cases that are valid for all block drivers, namely
2744 // exactly sequential writes and overlapping writes.
2745 if (reqs[i].sector <= oldreq_last) {
2746 merge = 1;
2747 }
2748
2749 // The block driver may decide that it makes sense to combine requests
2750 // even if there is a gap of some sectors between them. In this case,
2751 // the gap is filled with zeros (therefore only applicable for yet
2752 // unused space in format like qcow2).
2753 if (!merge && bs->drv->bdrv_merge_requests) {
2754 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2755 }
2756
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002757 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2758 merge = 0;
2759 }
2760
Kevin Wolf40b4f532009-09-09 17:53:37 +02002761 if (merge) {
2762 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002763 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002764 qemu_iovec_init(qiov,
2765 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2766
2767 // Add the first request to the merged one. If the requests are
2768 // overlapping, drop the last sectors of the first request.
2769 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2770 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2771
2772 // We might need to add some zeros between the two requests
2773 if (reqs[i].sector > oldreq_last) {
2774 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2775 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2776 memset(buf, 0, zero_bytes);
2777 qemu_iovec_add(qiov, buf, zero_bytes);
2778 mcb->callbacks[i].free_buf = buf;
2779 }
2780
2781 // Add the second request
2782 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2783
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002784 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002785 reqs[outidx].qiov = qiov;
2786
2787 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2788 } else {
2789 outidx++;
2790 reqs[outidx].sector = reqs[i].sector;
2791 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2792 reqs[outidx].qiov = reqs[i].qiov;
2793 }
2794 }
2795
2796 return outidx + 1;
2797}
2798
2799/*
2800 * Submit multiple AIO write requests at once.
2801 *
2802 * On success, the function returns 0 and all requests in the reqs array have
2803 * been submitted. In error case this function returns -1, and any of the
2804 * requests may or may not be submitted yet. In particular, this means that the
2805 * callback will be called for some of the requests, for others it won't. The
2806 * caller must check the error field of the BlockRequest to wait for the right
2807 * callbacks (if error != 0, no callback will be called).
2808 *
2809 * The implementation may modify the contents of the reqs array, e.g. to merge
2810 * requests. However, the fields opaque and error are left unmodified as they
2811 * are used to signal failure for a single request to the caller.
2812 */
2813int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2814{
Kevin Wolf40b4f532009-09-09 17:53:37 +02002815 MultiwriteCB *mcb;
2816 int i;
2817
Ryan Harper301db7c2011-03-07 10:01:04 -06002818 /* don't submit writes if we don't have a medium */
2819 if (bs->drv == NULL) {
2820 for (i = 0; i < num_reqs; i++) {
2821 reqs[i].error = -ENOMEDIUM;
2822 }
2823 return -1;
2824 }
2825
Kevin Wolf40b4f532009-09-09 17:53:37 +02002826 if (num_reqs == 0) {
2827 return 0;
2828 }
2829
2830 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002831 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002832 mcb->num_requests = 0;
2833 mcb->num_callbacks = num_reqs;
2834
2835 for (i = 0; i < num_reqs; i++) {
2836 mcb->callbacks[i].cb = reqs[i].cb;
2837 mcb->callbacks[i].opaque = reqs[i].opaque;
2838 }
2839
2840 // Check for mergable requests
2841 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2842
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002843 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2844
Kevin Wolf453f9a12010-07-02 14:01:21 +02002845 /*
2846 * Run the aio requests. As soon as one request can't be submitted
2847 * successfully, fail all requests that are not yet submitted (we must
2848 * return failure for all requests anyway)
2849 *
2850 * num_requests cannot be set to the right value immediately: If
2851 * bdrv_aio_writev fails for some request, num_requests would be too high
2852 * and therefore multiwrite_cb() would never recognize the multiwrite
2853 * request as completed. We also cannot use the loop variable i to set it
2854 * when the first request fails because the callback may already have been
2855 * called for previously submitted requests. Thus, num_requests must be
2856 * incremented for each request that is submitted.
2857 *
2858 * The problem that callbacks may be called early also means that we need
2859 * to take care that num_requests doesn't become 0 before all requests are
2860 * submitted - multiwrite_cb() would consider the multiwrite request
2861 * completed. A dummy request that is "completed" by a manual call to
2862 * multiwrite_cb() takes care of this.
2863 */
2864 mcb->num_requests = 1;
2865
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002866 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002867 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002868 mcb->num_requests++;
Paolo Bonziniad54ae82011-11-30 09:12:30 +01002869 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02002870 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002871 }
2872
Kevin Wolf453f9a12010-07-02 14:01:21 +02002873 /* Complete the dummy request */
2874 multiwrite_cb(mcb, 0);
2875
Kevin Wolf40b4f532009-09-09 17:53:37 +02002876 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002877}
2878
bellard83f64092006-08-01 16:21:11 +00002879void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002880{
aliguori6bbff9a2009-03-20 18:25:59 +00002881 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002882}
2883
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002884/* block I/O throttling */
2885static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2886 bool is_write, double elapsed_time, uint64_t *wait)
2887{
2888 uint64_t bps_limit = 0;
2889 double bytes_limit, bytes_base, bytes_res;
2890 double slice_time, wait_time;
2891
2892 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2893 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2894 } else if (bs->io_limits.bps[is_write]) {
2895 bps_limit = bs->io_limits.bps[is_write];
2896 } else {
2897 if (wait) {
2898 *wait = 0;
2899 }
2900
2901 return false;
2902 }
2903
2904 slice_time = bs->slice_end - bs->slice_start;
2905 slice_time /= (NANOSECONDS_PER_SECOND);
2906 bytes_limit = bps_limit * slice_time;
2907 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2908 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2909 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2910 }
2911
2912 /* bytes_base: the bytes of data which have been read/written; and
2913 * it is obtained from the history statistic info.
2914 * bytes_res: the remaining bytes of data which need to be read/written.
2915 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2916 * the total time for completing reading/writting all data.
2917 */
2918 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2919
2920 if (bytes_base + bytes_res <= bytes_limit) {
2921 if (wait) {
2922 *wait = 0;
2923 }
2924
2925 return false;
2926 }
2927
2928 /* Calc approx time to dispatch */
2929 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2930
2931 /* When the I/O rate at runtime exceeds the limits,
2932 * bs->slice_end need to be extended in order that the current statistic
2933 * info can be kept until the timer fire, so it is increased and tuned
2934 * based on the result of experiment.
2935 */
2936 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2937 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2938 if (wait) {
2939 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2940 }
2941
2942 return true;
2943}
2944
2945static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2946 double elapsed_time, uint64_t *wait)
2947{
2948 uint64_t iops_limit = 0;
2949 double ios_limit, ios_base;
2950 double slice_time, wait_time;
2951
2952 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2953 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2954 } else if (bs->io_limits.iops[is_write]) {
2955 iops_limit = bs->io_limits.iops[is_write];
2956 } else {
2957 if (wait) {
2958 *wait = 0;
2959 }
2960
2961 return false;
2962 }
2963
2964 slice_time = bs->slice_end - bs->slice_start;
2965 slice_time /= (NANOSECONDS_PER_SECOND);
2966 ios_limit = iops_limit * slice_time;
2967 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2968 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2969 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2970 }
2971
2972 if (ios_base + 1 <= ios_limit) {
2973 if (wait) {
2974 *wait = 0;
2975 }
2976
2977 return false;
2978 }
2979
2980 /* Calc approx time to dispatch */
2981 wait_time = (ios_base + 1) / iops_limit;
2982 if (wait_time > elapsed_time) {
2983 wait_time = wait_time - elapsed_time;
2984 } else {
2985 wait_time = 0;
2986 }
2987
2988 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2989 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2990 if (wait) {
2991 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2992 }
2993
2994 return true;
2995}
2996
2997static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2998 bool is_write, int64_t *wait)
2999{
3000 int64_t now, max_wait;
3001 uint64_t bps_wait = 0, iops_wait = 0;
3002 double elapsed_time;
3003 int bps_ret, iops_ret;
3004
3005 now = qemu_get_clock_ns(vm_clock);
3006 if ((bs->slice_start < now)
3007 && (bs->slice_end > now)) {
3008 bs->slice_end = now + bs->slice_time;
3009 } else {
3010 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3011 bs->slice_start = now;
3012 bs->slice_end = now + bs->slice_time;
3013
3014 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3015 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3016
3017 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3018 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3019 }
3020
3021 elapsed_time = now - bs->slice_start;
3022 elapsed_time /= (NANOSECONDS_PER_SECOND);
3023
3024 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3025 is_write, elapsed_time, &bps_wait);
3026 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3027 elapsed_time, &iops_wait);
3028 if (bps_ret || iops_ret) {
3029 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3030 if (wait) {
3031 *wait = max_wait;
3032 }
3033
3034 now = qemu_get_clock_ns(vm_clock);
3035 if (bs->slice_end < now + max_wait) {
3036 bs->slice_end = now + max_wait;
3037 }
3038
3039 return true;
3040 }
3041
3042 if (wait) {
3043 *wait = 0;
3044 }
3045
3046 return false;
3047}
pbrookce1a14d2006-08-07 02:38:06 +00003048
bellard83f64092006-08-01 16:21:11 +00003049/**************************************************************/
3050/* async block device emulation */
3051
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003052typedef struct BlockDriverAIOCBSync {
3053 BlockDriverAIOCB common;
3054 QEMUBH *bh;
3055 int ret;
3056 /* vector translation state */
3057 QEMUIOVector *qiov;
3058 uint8_t *bounce;
3059 int is_write;
3060} BlockDriverAIOCBSync;
3061
3062static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3063{
Kevin Wolfb666d232010-05-05 11:44:39 +02003064 BlockDriverAIOCBSync *acb =
3065 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003066 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003067 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003068 qemu_aio_release(acb);
3069}
3070
3071static AIOPool bdrv_em_aio_pool = {
3072 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3073 .cancel = bdrv_aio_cancel_em,
3074};
3075
bellard83f64092006-08-01 16:21:11 +00003076static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003077{
pbrookce1a14d2006-08-07 02:38:06 +00003078 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003079
aliguorif141eaf2009-04-07 18:43:24 +00003080 if (!acb->is_write)
3081 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003082 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003083 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003084 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003085 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003086 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003087}
bellardbeac80c2006-06-26 20:08:57 +00003088
aliguorif141eaf2009-04-07 18:43:24 +00003089static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3090 int64_t sector_num,
3091 QEMUIOVector *qiov,
3092 int nb_sectors,
3093 BlockDriverCompletionFunc *cb,
3094 void *opaque,
3095 int is_write)
3096
bellardea2384d2004-08-01 21:59:26 +00003097{
pbrookce1a14d2006-08-07 02:38:06 +00003098 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003099
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003100 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003101 acb->is_write = is_write;
3102 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003103 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00003104
pbrookce1a14d2006-08-07 02:38:06 +00003105 if (!acb->bh)
3106 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003107
3108 if (is_write) {
3109 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003110 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003111 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003112 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003113 }
3114
pbrookce1a14d2006-08-07 02:38:06 +00003115 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003116
pbrookce1a14d2006-08-07 02:38:06 +00003117 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003118}
3119
aliguorif141eaf2009-04-07 18:43:24 +00003120static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3121 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003122 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003123{
aliguorif141eaf2009-04-07 18:43:24 +00003124 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003125}
3126
aliguorif141eaf2009-04-07 18:43:24 +00003127static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3128 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3129 BlockDriverCompletionFunc *cb, void *opaque)
3130{
3131 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3132}
3133
Kevin Wolf68485422011-06-30 10:05:46 +02003134
3135typedef struct BlockDriverAIOCBCoroutine {
3136 BlockDriverAIOCB common;
3137 BlockRequest req;
3138 bool is_write;
3139 QEMUBH* bh;
3140} BlockDriverAIOCBCoroutine;
3141
3142static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3143{
3144 qemu_aio_flush();
3145}
3146
3147static AIOPool bdrv_em_co_aio_pool = {
3148 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3149 .cancel = bdrv_aio_co_cancel_em,
3150};
3151
Paolo Bonzini35246a62011-10-14 10:41:29 +02003152static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003153{
3154 BlockDriverAIOCBCoroutine *acb = opaque;
3155
3156 acb->common.cb(acb->common.opaque, acb->req.error);
3157 qemu_bh_delete(acb->bh);
3158 qemu_aio_release(acb);
3159}
3160
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003161/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3162static void coroutine_fn bdrv_co_do_rw(void *opaque)
3163{
3164 BlockDriverAIOCBCoroutine *acb = opaque;
3165 BlockDriverState *bs = acb->common.bs;
3166
3167 if (!acb->is_write) {
3168 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3169 acb->req.nb_sectors, acb->req.qiov);
3170 } else {
3171 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3172 acb->req.nb_sectors, acb->req.qiov);
3173 }
3174
Paolo Bonzini35246a62011-10-14 10:41:29 +02003175 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003176 qemu_bh_schedule(acb->bh);
3177}
3178
Kevin Wolf68485422011-06-30 10:05:46 +02003179static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3180 int64_t sector_num,
3181 QEMUIOVector *qiov,
3182 int nb_sectors,
3183 BlockDriverCompletionFunc *cb,
3184 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003185 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003186{
3187 Coroutine *co;
3188 BlockDriverAIOCBCoroutine *acb;
3189
3190 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3191 acb->req.sector = sector_num;
3192 acb->req.nb_sectors = nb_sectors;
3193 acb->req.qiov = qiov;
3194 acb->is_write = is_write;
3195
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003196 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003197 qemu_coroutine_enter(co, acb);
3198
3199 return &acb->common;
3200}
3201
Paolo Bonzini07f07612011-10-17 12:32:12 +02003202static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003203{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003204 BlockDriverAIOCBCoroutine *acb = opaque;
3205 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003206
Paolo Bonzini07f07612011-10-17 12:32:12 +02003207 acb->req.error = bdrv_co_flush(bs);
3208 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003209 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003210}
3211
Paolo Bonzini07f07612011-10-17 12:32:12 +02003212BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003213 BlockDriverCompletionFunc *cb, void *opaque)
3214{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003215 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003216
Paolo Bonzini07f07612011-10-17 12:32:12 +02003217 Coroutine *co;
3218 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003219
Paolo Bonzini07f07612011-10-17 12:32:12 +02003220 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3221 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3222 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003223
Alexander Graf016f5cf2010-05-26 17:51:49 +02003224 return &acb->common;
3225}
3226
Paolo Bonzini4265d622011-10-17 12:32:14 +02003227static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3228{
3229 BlockDriverAIOCBCoroutine *acb = opaque;
3230 BlockDriverState *bs = acb->common.bs;
3231
3232 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3233 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3234 qemu_bh_schedule(acb->bh);
3235}
3236
3237BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3238 int64_t sector_num, int nb_sectors,
3239 BlockDriverCompletionFunc *cb, void *opaque)
3240{
3241 Coroutine *co;
3242 BlockDriverAIOCBCoroutine *acb;
3243
3244 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3245
3246 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3247 acb->req.sector = sector_num;
3248 acb->req.nb_sectors = nb_sectors;
3249 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3250 qemu_coroutine_enter(co, acb);
3251
3252 return &acb->common;
3253}
3254
bellardea2384d2004-08-01 21:59:26 +00003255void bdrv_init(void)
3256{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003257 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003258}
pbrookce1a14d2006-08-07 02:38:06 +00003259
Markus Armbrustereb852012009-10-27 18:41:44 +01003260void bdrv_init_with_whitelist(void)
3261{
3262 use_bdrv_whitelist = 1;
3263 bdrv_init();
3264}
3265
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003266void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3267 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003268{
pbrookce1a14d2006-08-07 02:38:06 +00003269 BlockDriverAIOCB *acb;
3270
aliguori6bbff9a2009-03-20 18:25:59 +00003271 if (pool->free_aiocb) {
3272 acb = pool->free_aiocb;
3273 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003274 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003275 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003276 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003277 }
3278 acb->bs = bs;
3279 acb->cb = cb;
3280 acb->opaque = opaque;
3281 return acb;
3282}
3283
3284void qemu_aio_release(void *p)
3285{
aliguori6bbff9a2009-03-20 18:25:59 +00003286 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3287 AIOPool *pool = acb->pool;
3288 acb->next = pool->free_aiocb;
3289 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003290}
bellard19cb3732006-08-19 11:45:59 +00003291
3292/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003293/* Coroutine block device emulation */
3294
3295typedef struct CoroutineIOCompletion {
3296 Coroutine *coroutine;
3297 int ret;
3298} CoroutineIOCompletion;
3299
3300static void bdrv_co_io_em_complete(void *opaque, int ret)
3301{
3302 CoroutineIOCompletion *co = opaque;
3303
3304 co->ret = ret;
3305 qemu_coroutine_enter(co->coroutine, NULL);
3306}
3307
3308static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3309 int nb_sectors, QEMUIOVector *iov,
3310 bool is_write)
3311{
3312 CoroutineIOCompletion co = {
3313 .coroutine = qemu_coroutine_self(),
3314 };
3315 BlockDriverAIOCB *acb;
3316
3317 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003318 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3319 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003320 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003321 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3322 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003323 }
3324
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003325 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003326 if (!acb) {
3327 return -EIO;
3328 }
3329 qemu_coroutine_yield();
3330
3331 return co.ret;
3332}
3333
3334static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3335 int64_t sector_num, int nb_sectors,
3336 QEMUIOVector *iov)
3337{
3338 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3339}
3340
3341static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3342 int64_t sector_num, int nb_sectors,
3343 QEMUIOVector *iov)
3344{
3345 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3346}
3347
Paolo Bonzini07f07612011-10-17 12:32:12 +02003348static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003349{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003350 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003351
Paolo Bonzini07f07612011-10-17 12:32:12 +02003352 rwco->ret = bdrv_co_flush(rwco->bs);
3353}
3354
3355int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3356{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003357 int ret;
3358
Kevin Wolfca716362011-11-10 18:13:59 +01003359 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003360 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003361 }
3362
Kevin Wolfca716362011-11-10 18:13:59 +01003363 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003364 if (bs->drv->bdrv_co_flush_to_os) {
3365 ret = bs->drv->bdrv_co_flush_to_os(bs);
3366 if (ret < 0) {
3367 return ret;
3368 }
3369 }
3370
Kevin Wolfca716362011-11-10 18:13:59 +01003371 /* But don't actually force it to the disk with cache=unsafe */
3372 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3373 return 0;
3374 }
3375
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003376 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003377 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003378 } else if (bs->drv->bdrv_aio_flush) {
3379 BlockDriverAIOCB *acb;
3380 CoroutineIOCompletion co = {
3381 .coroutine = qemu_coroutine_self(),
3382 };
3383
3384 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3385 if (acb == NULL) {
3386 return -EIO;
3387 } else {
3388 qemu_coroutine_yield();
3389 return co.ret;
3390 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003391 } else {
3392 /*
3393 * Some block drivers always operate in either writethrough or unsafe
3394 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3395 * know how the server works (because the behaviour is hardcoded or
3396 * depends on server-side configuration), so we can't ensure that
3397 * everything is safe on disk. Returning an error doesn't work because
3398 * that would break guests even if the server operates in writethrough
3399 * mode.
3400 *
3401 * Let's hope the user knows what he's doing.
3402 */
3403 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003404 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003405}
3406
Anthony Liguori0f154232011-11-14 15:09:45 -06003407void bdrv_invalidate_cache(BlockDriverState *bs)
3408{
3409 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3410 bs->drv->bdrv_invalidate_cache(bs);
3411 }
3412}
3413
3414void bdrv_invalidate_cache_all(void)
3415{
3416 BlockDriverState *bs;
3417
3418 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3419 bdrv_invalidate_cache(bs);
3420 }
3421}
3422
Paolo Bonzini07f07612011-10-17 12:32:12 +02003423int bdrv_flush(BlockDriverState *bs)
3424{
3425 Coroutine *co;
3426 RwCo rwco = {
3427 .bs = bs,
3428 .ret = NOT_DONE,
3429 };
3430
3431 if (qemu_in_coroutine()) {
3432 /* Fast-path if already in coroutine context */
3433 bdrv_flush_co_entry(&rwco);
3434 } else {
3435 co = qemu_coroutine_create(bdrv_flush_co_entry);
3436 qemu_coroutine_enter(co, &rwco);
3437 while (rwco.ret == NOT_DONE) {
3438 qemu_aio_wait();
3439 }
3440 }
3441
3442 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003443}
3444
Paolo Bonzini4265d622011-10-17 12:32:14 +02003445static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3446{
3447 RwCo *rwco = opaque;
3448
3449 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3450}
3451
3452int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3453 int nb_sectors)
3454{
3455 if (!bs->drv) {
3456 return -ENOMEDIUM;
3457 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3458 return -EIO;
3459 } else if (bs->read_only) {
3460 return -EROFS;
3461 } else if (bs->drv->bdrv_co_discard) {
3462 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3463 } else if (bs->drv->bdrv_aio_discard) {
3464 BlockDriverAIOCB *acb;
3465 CoroutineIOCompletion co = {
3466 .coroutine = qemu_coroutine_self(),
3467 };
3468
3469 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3470 bdrv_co_io_em_complete, &co);
3471 if (acb == NULL) {
3472 return -EIO;
3473 } else {
3474 qemu_coroutine_yield();
3475 return co.ret;
3476 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003477 } else {
3478 return 0;
3479 }
3480}
3481
3482int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3483{
3484 Coroutine *co;
3485 RwCo rwco = {
3486 .bs = bs,
3487 .sector_num = sector_num,
3488 .nb_sectors = nb_sectors,
3489 .ret = NOT_DONE,
3490 };
3491
3492 if (qemu_in_coroutine()) {
3493 /* Fast-path if already in coroutine context */
3494 bdrv_discard_co_entry(&rwco);
3495 } else {
3496 co = qemu_coroutine_create(bdrv_discard_co_entry);
3497 qemu_coroutine_enter(co, &rwco);
3498 while (rwco.ret == NOT_DONE) {
3499 qemu_aio_wait();
3500 }
3501 }
3502
3503 return rwco.ret;
3504}
3505
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003506/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003507/* removable device support */
3508
3509/**
3510 * Return TRUE if the media is present
3511 */
3512int bdrv_is_inserted(BlockDriverState *bs)
3513{
3514 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003515
bellard19cb3732006-08-19 11:45:59 +00003516 if (!drv)
3517 return 0;
3518 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003519 return 1;
3520 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003521}
3522
3523/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003524 * Return whether the media changed since the last call to this
3525 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003526 */
3527int bdrv_media_changed(BlockDriverState *bs)
3528{
3529 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003530
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003531 if (drv && drv->bdrv_media_changed) {
3532 return drv->bdrv_media_changed(bs);
3533 }
3534 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003535}
3536
3537/**
3538 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3539 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003540void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003541{
3542 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003543
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003544 if (drv && drv->bdrv_eject) {
3545 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003546 }
bellard19cb3732006-08-19 11:45:59 +00003547}
3548
bellard19cb3732006-08-19 11:45:59 +00003549/**
3550 * Lock or unlock the media (if it is locked, the user won't be able
3551 * to eject it manually).
3552 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003553void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003554{
3555 BlockDriver *drv = bs->drv;
3556
Markus Armbruster025e8492011-09-06 18:58:47 +02003557 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003558
Markus Armbruster025e8492011-09-06 18:58:47 +02003559 if (drv && drv->bdrv_lock_medium) {
3560 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003561 }
3562}
ths985a03b2007-12-24 16:10:43 +00003563
3564/* needed for generic scsi interface */
3565
3566int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3567{
3568 BlockDriver *drv = bs->drv;
3569
3570 if (drv && drv->bdrv_ioctl)
3571 return drv->bdrv_ioctl(bs, req, buf);
3572 return -ENOTSUP;
3573}
aliguori7d780662009-03-12 19:57:08 +00003574
aliguori221f7152009-03-28 17:28:41 +00003575BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3576 unsigned long int req, void *buf,
3577 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003578{
aliguori221f7152009-03-28 17:28:41 +00003579 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003580
aliguori221f7152009-03-28 17:28:41 +00003581 if (drv && drv->bdrv_aio_ioctl)
3582 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3583 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003584}
aliguorie268ca52009-04-22 20:20:00 +00003585
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003586void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3587{
3588 bs->buffer_alignment = align;
3589}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003590
aliguorie268ca52009-04-22 20:20:00 +00003591void *qemu_blockalign(BlockDriverState *bs, size_t size)
3592{
3593 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3594}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003595
3596void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3597{
3598 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003599
Liran Schouraaa0eb72010-01-26 10:31:48 +02003600 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003601 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003602 if (!bs->dirty_bitmap) {
3603 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3604 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3605 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003606
Anthony Liguori7267c092011-08-20 22:09:37 -05003607 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003608 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003609 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003610 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003611 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003612 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003613 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003614 }
3615}
3616
3617int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3618{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003619 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003620
Jan Kiszkac6d22832009-11-30 18:21:20 +01003621 if (bs->dirty_bitmap &&
3622 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003623 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3624 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003625 } else {
3626 return 0;
3627 }
3628}
3629
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003630void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3631 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003632{
3633 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3634}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003635
3636int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3637{
3638 return bs->dirty_count;
3639}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003640
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003641void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3642{
3643 assert(bs->in_use != in_use);
3644 bs->in_use = in_use;
3645}
3646
3647int bdrv_in_use(BlockDriverState *bs)
3648{
3649 return bs->in_use;
3650}
3651
Luiz Capitulino28a72822011-09-26 17:43:50 -03003652void bdrv_iostatus_enable(BlockDriverState *bs)
3653{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003654 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003655 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003656}
3657
3658/* The I/O status is only enabled if the drive explicitly
3659 * enables it _and_ the VM is configured to stop on errors */
3660bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3661{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003662 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003663 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3664 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3665 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3666}
3667
3668void bdrv_iostatus_disable(BlockDriverState *bs)
3669{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003670 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003671}
3672
3673void bdrv_iostatus_reset(BlockDriverState *bs)
3674{
3675 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003676 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003677 }
3678}
3679
3680/* XXX: Today this is set by device models because it makes the implementation
3681 quite simple. However, the block layer knows about the error, so it's
3682 possible to implement this without device models being involved */
3683void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3684{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003685 if (bdrv_iostatus_is_enabled(bs) &&
3686 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003687 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003688 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3689 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003690 }
3691}
3692
Christoph Hellwiga597e792011-08-25 08:26:01 +02003693void
3694bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3695 enum BlockAcctType type)
3696{
3697 assert(type < BDRV_MAX_IOTYPE);
3698
3699 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003700 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003701 cookie->type = type;
3702}
3703
3704void
3705bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3706{
3707 assert(cookie->type < BDRV_MAX_IOTYPE);
3708
3709 bs->nr_bytes[cookie->type] += cookie->bytes;
3710 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003711 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003712}
3713
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003714int bdrv_img_create(const char *filename, const char *fmt,
3715 const char *base_filename, const char *base_fmt,
3716 char *options, uint64_t img_size, int flags)
3717{
3718 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003719 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003720 BlockDriverState *bs = NULL;
3721 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003722 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003723 int ret = 0;
3724
3725 /* Find driver and parse its options */
3726 drv = bdrv_find_format(fmt);
3727 if (!drv) {
3728 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003729 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003730 goto out;
3731 }
3732
3733 proto_drv = bdrv_find_protocol(filename);
3734 if (!proto_drv) {
3735 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003736 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003737 goto out;
3738 }
3739
3740 create_options = append_option_parameters(create_options,
3741 drv->create_options);
3742 create_options = append_option_parameters(create_options,
3743 proto_drv->create_options);
3744
3745 /* Create parameter list with default values */
3746 param = parse_option_parameters("", create_options, param);
3747
3748 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3749
3750 /* Parse -o options */
3751 if (options) {
3752 param = parse_option_parameters(options, create_options, param);
3753 if (param == NULL) {
3754 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003755 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003756 goto out;
3757 }
3758 }
3759
3760 if (base_filename) {
3761 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3762 base_filename)) {
3763 error_report("Backing file not supported for file format '%s'",
3764 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003765 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003766 goto out;
3767 }
3768 }
3769
3770 if (base_fmt) {
3771 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3772 error_report("Backing file format not supported for file "
3773 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003774 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003775 goto out;
3776 }
3777 }
3778
Jes Sorensen792da932010-12-16 13:52:17 +01003779 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3780 if (backing_file && backing_file->value.s) {
3781 if (!strcmp(filename, backing_file->value.s)) {
3782 error_report("Error: Trying to create an image with the "
3783 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003784 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003785 goto out;
3786 }
3787 }
3788
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003789 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3790 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003791 backing_drv = bdrv_find_format(backing_fmt->value.s);
3792 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003793 error_report("Unknown backing file format '%s'",
3794 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003795 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003796 goto out;
3797 }
3798 }
3799
3800 // The size for the image must always be specified, with one exception:
3801 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003802 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3803 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003804 if (backing_file && backing_file->value.s) {
3805 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003806 char buf[32];
3807
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003808 bs = bdrv_new("");
3809
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003810 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003811 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003812 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003813 goto out;
3814 }
3815 bdrv_get_geometry(bs, &size);
3816 size *= 512;
3817
3818 snprintf(buf, sizeof(buf), "%" PRId64, size);
3819 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3820 } else {
3821 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003822 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003823 goto out;
3824 }
3825 }
3826
3827 printf("Formatting '%s', fmt=%s ", filename, fmt);
3828 print_option_parameters(param);
3829 puts("");
3830
3831 ret = bdrv_create(drv, filename, param);
3832
3833 if (ret < 0) {
3834 if (ret == -ENOTSUP) {
3835 error_report("Formatting or formatting option not supported for "
3836 "file format '%s'", fmt);
3837 } else if (ret == -EFBIG) {
3838 error_report("The image size is too large for file format '%s'",
3839 fmt);
3840 } else {
3841 error_report("%s: error while creating %s: %s", filename, fmt,
3842 strerror(-ret));
3843 }
3844 }
3845
3846out:
3847 free_option_parameters(create_options);
3848 free_option_parameters(param);
3849
3850 if (bs) {
3851 bdrv_delete(bs);
3852 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003853
3854 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003855}