blob: dd5d5ca4d2a2438acd16a0f1b43a4d0c67aa879a [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Ryan Harperd22b2f42011-03-29 20:51:47 -0500849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
bellardb3380822004-03-14 21:38:54 +0000859void bdrv_delete(BlockDriverState *bs)
860{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200861 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200862
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100863 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500864 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000865
bellardb3380822004-03-14 21:38:54 +0000866 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200871 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500872 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000873}
874
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200877{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200878 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200879 return -EBUSY;
880 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200881 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300882 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200883 return 0;
884}
885
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200888{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200901 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200902}
903
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200906{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200907 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200908}
909
Markus Armbruster0e49de52011-08-03 15:07:41 +0200910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918}
919
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200921{
Markus Armbruster145feb12011-08-03 15:07:42 +0200922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200924 }
925}
926
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
Markus Armbrustere4def802011-09-06 18:58:53 +0200939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
Markus Armbruster145feb12011-08-03 15:07:42 +0200947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200951 }
952}
953
Markus Armbrusterf1076392011-09-06 18:58:46 +0200954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
aliguorie97fc192009-04-21 23:11:50 +0000962/*
963 * Run consistency checks on an image
964 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200965 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200966 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200967 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000968 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
Kevin Wolfe076f332010-06-29 11:43:13 +0200975 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200976 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000977}
978
Kevin Wolf8a426612010-07-16 17:17:01 +0200979#define COMMIT_BUF_SECTORS 2048
980
bellard33e39632003-07-06 17:15:21 +0000981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
bellard19cb3732006-08-19 11:45:59 +0000984 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +0200985 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +0200986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200988 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +0200989 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +0000992
bellard19cb3732006-08-19 11:45:59 +0000993 if (!drv)
994 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +0000998 }
999
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
Kevin Wolfee181192010-08-05 13:05:22 +02001003
1004 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001032 }
bellardea2384d2004-08-01 21:59:26 +00001033
Jan Kiszka6ea44302009-11-30 18:21:19 +01001034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001036
Kevin Wolf8a426612010-07-16 17:17:01 +02001037 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
bellardea2384d2004-08-01 21:59:26 +00001049 }
1050 }
bellard95389c82005-12-18 18:28:15 +00001051
Christoph Hellwig1d449522010-01-17 12:32:30 +01001052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
bellard95389c82005-12-18 18:28:15 +00001056
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001063
1064ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
Christoph Hellwig1d449522010-01-17 12:32:30 +01001084 return ret;
bellard33e39632003-07-06 17:15:21 +00001085}
1086
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001102 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001103};
1104
1105/**
1106 * Remove an active request from the tracked requests list
1107 *
1108 * This function should be called when a tracked request is completing.
1109 */
1110static void tracked_request_end(BdrvTrackedRequest *req)
1111{
1112 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001113 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001114}
1115
1116/**
1117 * Add an active request to the tracked requests list
1118 */
1119static void tracked_request_begin(BdrvTrackedRequest *req,
1120 BlockDriverState *bs,
1121 int64_t sector_num,
1122 int nb_sectors, bool is_write)
1123{
1124 *req = (BdrvTrackedRequest){
1125 .bs = bs,
1126 .sector_num = sector_num,
1127 .nb_sectors = nb_sectors,
1128 .is_write = is_write,
1129 };
1130
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001131 qemu_co_queue_init(&req->wait_queue);
1132
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001133 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1134}
1135
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001136/**
1137 * Round a region to cluster boundaries
1138 */
1139static void round_to_clusters(BlockDriverState *bs,
1140 int64_t sector_num, int nb_sectors,
1141 int64_t *cluster_sector_num,
1142 int *cluster_nb_sectors)
1143{
1144 BlockDriverInfo bdi;
1145
1146 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1147 *cluster_sector_num = sector_num;
1148 *cluster_nb_sectors = nb_sectors;
1149 } else {
1150 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1151 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1152 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1153 nb_sectors, c);
1154 }
1155}
1156
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001157static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1158 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001159 /* aaaa bbbb */
1160 if (sector_num >= req->sector_num + req->nb_sectors) {
1161 return false;
1162 }
1163 /* bbbb aaaa */
1164 if (req->sector_num >= sector_num + nb_sectors) {
1165 return false;
1166 }
1167 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001168}
1169
1170static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1171 int64_t sector_num, int nb_sectors)
1172{
1173 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001174 int64_t cluster_sector_num;
1175 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001176 bool retry;
1177
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001178 /* If we touch the same cluster it counts as an overlap. This guarantees
1179 * that allocating writes will be serialized and not race with each other
1180 * for the same cluster. For example, in copy-on-read it ensures that the
1181 * CoR read and write operations are atomic and guest writes cannot
1182 * interleave between them.
1183 */
1184 round_to_clusters(bs, sector_num, nb_sectors,
1185 &cluster_sector_num, &cluster_nb_sectors);
1186
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001187 do {
1188 retry = false;
1189 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001190 if (tracked_request_overlaps(req, cluster_sector_num,
1191 cluster_nb_sectors)) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001192 qemu_co_queue_wait(&req->wait_queue);
1193 retry = true;
1194 break;
1195 }
1196 }
1197 } while (retry);
1198}
1199
Kevin Wolf756e6732010-01-12 12:55:17 +01001200/*
1201 * Return values:
1202 * 0 - success
1203 * -EINVAL - backing format specified, but no file
1204 * -ENOSPC - can't update the backing file because no space is left in the
1205 * image file header
1206 * -ENOTSUP - format driver doesn't support changing the backing file
1207 */
1208int bdrv_change_backing_file(BlockDriverState *bs,
1209 const char *backing_file, const char *backing_fmt)
1210{
1211 BlockDriver *drv = bs->drv;
1212
1213 if (drv->bdrv_change_backing_file != NULL) {
1214 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1215 } else {
1216 return -ENOTSUP;
1217 }
1218}
1219
aliguori71d07702009-03-03 17:37:16 +00001220static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1221 size_t size)
1222{
1223 int64_t len;
1224
1225 if (!bdrv_is_inserted(bs))
1226 return -ENOMEDIUM;
1227
1228 if (bs->growable)
1229 return 0;
1230
1231 len = bdrv_getlength(bs);
1232
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001233 if (offset < 0)
1234 return -EIO;
1235
1236 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001237 return -EIO;
1238
1239 return 0;
1240}
1241
1242static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1243 int nb_sectors)
1244{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001245 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1246 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001247}
1248
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001249typedef struct RwCo {
1250 BlockDriverState *bs;
1251 int64_t sector_num;
1252 int nb_sectors;
1253 QEMUIOVector *qiov;
1254 bool is_write;
1255 int ret;
1256} RwCo;
1257
1258static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1259{
1260 RwCo *rwco = opaque;
1261
1262 if (!rwco->is_write) {
1263 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1264 rwco->nb_sectors, rwco->qiov);
1265 } else {
1266 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1267 rwco->nb_sectors, rwco->qiov);
1268 }
1269}
1270
1271/*
1272 * Process a synchronous request using coroutines
1273 */
1274static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1275 int nb_sectors, bool is_write)
1276{
1277 QEMUIOVector qiov;
1278 struct iovec iov = {
1279 .iov_base = (void *)buf,
1280 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1281 };
1282 Coroutine *co;
1283 RwCo rwco = {
1284 .bs = bs,
1285 .sector_num = sector_num,
1286 .nb_sectors = nb_sectors,
1287 .qiov = &qiov,
1288 .is_write = is_write,
1289 .ret = NOT_DONE,
1290 };
1291
1292 qemu_iovec_init_external(&qiov, &iov, 1);
1293
1294 if (qemu_in_coroutine()) {
1295 /* Fast-path if already in coroutine context */
1296 bdrv_rw_co_entry(&rwco);
1297 } else {
1298 co = qemu_coroutine_create(bdrv_rw_co_entry);
1299 qemu_coroutine_enter(co, &rwco);
1300 while (rwco.ret == NOT_DONE) {
1301 qemu_aio_wait();
1302 }
1303 }
1304 return rwco.ret;
1305}
1306
bellard19cb3732006-08-19 11:45:59 +00001307/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001308int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001309 uint8_t *buf, int nb_sectors)
1310{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001311 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001312}
1313
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001314static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001315 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001316{
1317 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001318 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001319
Jan Kiszka6ea44302009-11-30 18:21:19 +01001320 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001321 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001322
1323 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001324 idx = start / (sizeof(unsigned long) * 8);
1325 bit = start % (sizeof(unsigned long) * 8);
1326 val = bs->dirty_bitmap[idx];
1327 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001328 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001329 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001330 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001331 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001332 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001333 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001334 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001335 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001336 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001337 }
1338 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001339 }
1340}
1341
ths5fafdf22007-09-16 21:08:06 +00001342/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001343 -EIO generic I/O error (may happen for all errors)
1344 -ENOMEDIUM No media inserted.
1345 -EINVAL Invalid sector number or nb_sectors
1346 -EACCES Trying to write a read-only device
1347*/
ths5fafdf22007-09-16 21:08:06 +00001348int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001349 const uint8_t *buf, int nb_sectors)
1350{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001351 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001352}
1353
aliguorieda578e2009-03-12 19:57:16 +00001354int bdrv_pread(BlockDriverState *bs, int64_t offset,
1355 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001356{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001357 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001358 int len, nb_sectors, count;
1359 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001360 int ret;
bellard83f64092006-08-01 16:21:11 +00001361
1362 count = count1;
1363 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001364 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001365 if (len > count)
1366 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001367 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001368 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001369 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1370 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001371 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001372 count -= len;
1373 if (count == 0)
1374 return count1;
1375 sector_num++;
1376 buf += len;
1377 }
1378
1379 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001380 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001381 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001382 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1383 return ret;
bellard83f64092006-08-01 16:21:11 +00001384 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001385 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001386 buf += len;
1387 count -= len;
1388 }
1389
1390 /* add data from the last sector */
1391 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001392 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1393 return ret;
bellard83f64092006-08-01 16:21:11 +00001394 memcpy(buf, tmp_buf, count);
1395 }
1396 return count1;
1397}
1398
aliguorieda578e2009-03-12 19:57:16 +00001399int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1400 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001401{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001402 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001403 int len, nb_sectors, count;
1404 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001405 int ret;
bellard83f64092006-08-01 16:21:11 +00001406
1407 count = count1;
1408 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001409 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001410 if (len > count)
1411 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001412 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001413 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001414 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1415 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001416 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001417 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1418 return ret;
bellard83f64092006-08-01 16:21:11 +00001419 count -= len;
1420 if (count == 0)
1421 return count1;
1422 sector_num++;
1423 buf += len;
1424 }
1425
1426 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001427 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001428 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001429 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1430 return ret;
bellard83f64092006-08-01 16:21:11 +00001431 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001432 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001433 buf += len;
1434 count -= len;
1435 }
1436
1437 /* add data from the last sector */
1438 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001439 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1440 return ret;
bellard83f64092006-08-01 16:21:11 +00001441 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001442 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1443 return ret;
bellard83f64092006-08-01 16:21:11 +00001444 }
1445 return count1;
1446}
bellard83f64092006-08-01 16:21:11 +00001447
Kevin Wolff08145f2010-06-16 16:38:15 +02001448/*
1449 * Writes to the file and ensures that no writes are reordered across this
1450 * request (acts as a barrier)
1451 *
1452 * Returns 0 on success, -errno in error cases.
1453 */
1454int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1455 const void *buf, int count)
1456{
1457 int ret;
1458
1459 ret = bdrv_pwrite(bs, offset, buf, count);
1460 if (ret < 0) {
1461 return ret;
1462 }
1463
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001464 /* No flush needed for cache modes that use O_DSYNC */
1465 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001466 bdrv_flush(bs);
1467 }
1468
1469 return 0;
1470}
1471
Stefan Hajnocziab185922011-11-17 13:40:31 +00001472static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1473 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1474{
1475 /* Perform I/O through a temporary buffer so that users who scribble over
1476 * their read buffer while the operation is in progress do not end up
1477 * modifying the image file. This is critical for zero-copy guest I/O
1478 * where anything might happen inside guest memory.
1479 */
1480 void *bounce_buffer;
1481
1482 struct iovec iov;
1483 QEMUIOVector bounce_qiov;
1484 int64_t cluster_sector_num;
1485 int cluster_nb_sectors;
1486 size_t skip_bytes;
1487 int ret;
1488
1489 /* Cover entire cluster so no additional backing file I/O is required when
1490 * allocating cluster in the image file.
1491 */
1492 round_to_clusters(bs, sector_num, nb_sectors,
1493 &cluster_sector_num, &cluster_nb_sectors);
1494
1495 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1496 cluster_sector_num, cluster_nb_sectors);
1497
1498 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1499 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1500 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1501
1502 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1503 &bounce_qiov);
1504 if (ret < 0) {
1505 goto err;
1506 }
1507
1508 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1509 &bounce_qiov);
1510 if (ret < 0) {
1511 /* It might be okay to ignore write errors for guest requests. If this
1512 * is a deliberate copy-on-read then we don't want to ignore the error.
1513 * Simply report it in all cases.
1514 */
1515 goto err;
1516 }
1517
1518 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1519 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1520 nb_sectors * BDRV_SECTOR_SIZE);
1521
1522err:
1523 qemu_vfree(bounce_buffer);
1524 return ret;
1525}
1526
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001527/*
1528 * Handle a read request in coroutine context
1529 */
1530static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1531 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001532{
1533 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001534 BdrvTrackedRequest req;
1535 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001536
Kevin Wolfda1fa912011-07-14 17:27:13 +02001537 if (!drv) {
1538 return -ENOMEDIUM;
1539 }
1540 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1541 return -EIO;
1542 }
1543
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001544 /* throttling disk read I/O */
1545 if (bs->io_limits_enabled) {
1546 bdrv_io_limits_intercept(bs, false, nb_sectors);
1547 }
1548
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001549 if (bs->copy_on_read) {
1550 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1551 }
1552
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001553 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001554
1555 if (bs->copy_on_read) {
1556 int pnum;
1557
1558 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1559 if (ret < 0) {
1560 goto out;
1561 }
1562
1563 if (!ret || pnum != nb_sectors) {
1564 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1565 goto out;
1566 }
1567 }
1568
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001569 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001570
1571out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001572 tracked_request_end(&req);
1573 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001574}
1575
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001576int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001577 int nb_sectors, QEMUIOVector *qiov)
1578{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001579 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001580
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001581 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1582}
1583
1584/*
1585 * Handle a write request in coroutine context
1586 */
1587static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1588 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1589{
1590 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001591 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001592 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001593
1594 if (!bs->drv) {
1595 return -ENOMEDIUM;
1596 }
1597 if (bs->read_only) {
1598 return -EACCES;
1599 }
1600 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1601 return -EIO;
1602 }
1603
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001604 /* throttling disk write I/O */
1605 if (bs->io_limits_enabled) {
1606 bdrv_io_limits_intercept(bs, true, nb_sectors);
1607 }
1608
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001609 if (bs->copy_on_read) {
1610 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1611 }
1612
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001613 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1614
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001615 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1616
Kevin Wolfda1fa912011-07-14 17:27:13 +02001617 if (bs->dirty_bitmap) {
1618 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1619 }
1620
1621 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1622 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1623 }
1624
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001625 tracked_request_end(&req);
1626
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001627 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001628}
1629
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001630int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1631 int nb_sectors, QEMUIOVector *qiov)
1632{
1633 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1634
1635 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1636}
1637
bellard83f64092006-08-01 16:21:11 +00001638/**
bellard83f64092006-08-01 16:21:11 +00001639 * Truncate file to 'offset' bytes (needed only for file protocols)
1640 */
1641int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1642{
1643 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001644 int ret;
bellard83f64092006-08-01 16:21:11 +00001645 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001646 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001647 if (!drv->bdrv_truncate)
1648 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001649 if (bs->read_only)
1650 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001651 if (bdrv_in_use(bs))
1652 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001653 ret = drv->bdrv_truncate(bs, offset);
1654 if (ret == 0) {
1655 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001656 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001657 }
1658 return ret;
bellard83f64092006-08-01 16:21:11 +00001659}
1660
1661/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001662 * Length of a allocated file in bytes. Sparse files are counted by actual
1663 * allocated space. Return < 0 if error or unknown.
1664 */
1665int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1666{
1667 BlockDriver *drv = bs->drv;
1668 if (!drv) {
1669 return -ENOMEDIUM;
1670 }
1671 if (drv->bdrv_get_allocated_file_size) {
1672 return drv->bdrv_get_allocated_file_size(bs);
1673 }
1674 if (bs->file) {
1675 return bdrv_get_allocated_file_size(bs->file);
1676 }
1677 return -ENOTSUP;
1678}
1679
1680/**
bellard83f64092006-08-01 16:21:11 +00001681 * Length of a file in bytes. Return < 0 if error or unknown.
1682 */
1683int64_t bdrv_getlength(BlockDriverState *bs)
1684{
1685 BlockDriver *drv = bs->drv;
1686 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001687 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001688
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001689 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001690 if (drv->bdrv_getlength) {
1691 return drv->bdrv_getlength(bs);
1692 }
bellard83f64092006-08-01 16:21:11 +00001693 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001694 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001695}
1696
bellard19cb3732006-08-19 11:45:59 +00001697/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001698void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001699{
bellard19cb3732006-08-19 11:45:59 +00001700 int64_t length;
1701 length = bdrv_getlength(bs);
1702 if (length < 0)
1703 length = 0;
1704 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001705 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001706 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001707}
bellardcf989512004-02-16 21:56:36 +00001708
aliguorif3d54fc2008-11-25 21:50:24 +00001709struct partition {
1710 uint8_t boot_ind; /* 0x80 - active */
1711 uint8_t head; /* starting head */
1712 uint8_t sector; /* starting sector */
1713 uint8_t cyl; /* starting cylinder */
1714 uint8_t sys_ind; /* What partition type */
1715 uint8_t end_head; /* end head */
1716 uint8_t end_sector; /* end sector */
1717 uint8_t end_cyl; /* end cylinder */
1718 uint32_t start_sect; /* starting sector counting from 0 */
1719 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001720} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001721
1722/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1723static int guess_disk_lchs(BlockDriverState *bs,
1724 int *pcylinders, int *pheads, int *psectors)
1725{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001726 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001727 int ret, i, heads, sectors, cylinders;
1728 struct partition *p;
1729 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001730 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001731
1732 bdrv_get_geometry(bs, &nb_sectors);
1733
1734 ret = bdrv_read(bs, 0, buf, 1);
1735 if (ret < 0)
1736 return -1;
1737 /* test msdos magic */
1738 if (buf[510] != 0x55 || buf[511] != 0xaa)
1739 return -1;
1740 for(i = 0; i < 4; i++) {
1741 p = ((struct partition *)(buf + 0x1be)) + i;
1742 nr_sects = le32_to_cpu(p->nr_sects);
1743 if (nr_sects && p->end_head) {
1744 /* We make the assumption that the partition terminates on
1745 a cylinder boundary */
1746 heads = p->end_head + 1;
1747 sectors = p->end_sector & 63;
1748 if (sectors == 0)
1749 continue;
1750 cylinders = nb_sectors / (heads * sectors);
1751 if (cylinders < 1 || cylinders > 16383)
1752 continue;
1753 *pheads = heads;
1754 *psectors = sectors;
1755 *pcylinders = cylinders;
1756#if 0
1757 printf("guessed geometry: LCHS=%d %d %d\n",
1758 cylinders, heads, sectors);
1759#endif
1760 return 0;
1761 }
1762 }
1763 return -1;
1764}
1765
1766void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1767{
1768 int translation, lba_detected = 0;
1769 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001770 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001771
1772 /* if a geometry hint is available, use it */
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1775 translation = bdrv_get_translation_hint(bs);
1776 if (cylinders != 0) {
1777 *pcyls = cylinders;
1778 *pheads = heads;
1779 *psecs = secs;
1780 } else {
1781 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1782 if (heads > 16) {
1783 /* if heads > 16, it means that a BIOS LBA
1784 translation was active, so the default
1785 hardware geometry is OK */
1786 lba_detected = 1;
1787 goto default_geometry;
1788 } else {
1789 *pcyls = cylinders;
1790 *pheads = heads;
1791 *psecs = secs;
1792 /* disable any translation to be in sync with
1793 the logical geometry */
1794 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1795 bdrv_set_translation_hint(bs,
1796 BIOS_ATA_TRANSLATION_NONE);
1797 }
1798 }
1799 } else {
1800 default_geometry:
1801 /* if no geometry, use a standard physical disk geometry */
1802 cylinders = nb_sectors / (16 * 63);
1803
1804 if (cylinders > 16383)
1805 cylinders = 16383;
1806 else if (cylinders < 2)
1807 cylinders = 2;
1808 *pcyls = cylinders;
1809 *pheads = 16;
1810 *psecs = 63;
1811 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1812 if ((*pcyls * *pheads) <= 131072) {
1813 bdrv_set_translation_hint(bs,
1814 BIOS_ATA_TRANSLATION_LARGE);
1815 } else {
1816 bdrv_set_translation_hint(bs,
1817 BIOS_ATA_TRANSLATION_LBA);
1818 }
1819 }
1820 }
1821 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1822 }
1823}
1824
ths5fafdf22007-09-16 21:08:06 +00001825void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001826 int cyls, int heads, int secs)
1827{
1828 bs->cyls = cyls;
1829 bs->heads = heads;
1830 bs->secs = secs;
1831}
1832
bellard46d47672004-11-16 01:45:27 +00001833void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1834{
1835 bs->translation = translation;
1836}
1837
ths5fafdf22007-09-16 21:08:06 +00001838void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001839 int *pcyls, int *pheads, int *psecs)
1840{
1841 *pcyls = bs->cyls;
1842 *pheads = bs->heads;
1843 *psecs = bs->secs;
1844}
1845
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001846/* throttling disk io limits */
1847void bdrv_set_io_limits(BlockDriverState *bs,
1848 BlockIOLimit *io_limits)
1849{
1850 bs->io_limits = *io_limits;
1851 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1852}
1853
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001854/* Recognize floppy formats */
1855typedef struct FDFormat {
1856 FDriveType drive;
1857 uint8_t last_sect;
1858 uint8_t max_track;
1859 uint8_t max_head;
1860} FDFormat;
1861
1862static const FDFormat fd_formats[] = {
1863 /* First entry is default format */
1864 /* 1.44 MB 3"1/2 floppy disks */
1865 { FDRIVE_DRV_144, 18, 80, 1, },
1866 { FDRIVE_DRV_144, 20, 80, 1, },
1867 { FDRIVE_DRV_144, 21, 80, 1, },
1868 { FDRIVE_DRV_144, 21, 82, 1, },
1869 { FDRIVE_DRV_144, 21, 83, 1, },
1870 { FDRIVE_DRV_144, 22, 80, 1, },
1871 { FDRIVE_DRV_144, 23, 80, 1, },
1872 { FDRIVE_DRV_144, 24, 80, 1, },
1873 /* 2.88 MB 3"1/2 floppy disks */
1874 { FDRIVE_DRV_288, 36, 80, 1, },
1875 { FDRIVE_DRV_288, 39, 80, 1, },
1876 { FDRIVE_DRV_288, 40, 80, 1, },
1877 { FDRIVE_DRV_288, 44, 80, 1, },
1878 { FDRIVE_DRV_288, 48, 80, 1, },
1879 /* 720 kB 3"1/2 floppy disks */
1880 { FDRIVE_DRV_144, 9, 80, 1, },
1881 { FDRIVE_DRV_144, 10, 80, 1, },
1882 { FDRIVE_DRV_144, 10, 82, 1, },
1883 { FDRIVE_DRV_144, 10, 83, 1, },
1884 { FDRIVE_DRV_144, 13, 80, 1, },
1885 { FDRIVE_DRV_144, 14, 80, 1, },
1886 /* 1.2 MB 5"1/4 floppy disks */
1887 { FDRIVE_DRV_120, 15, 80, 1, },
1888 { FDRIVE_DRV_120, 18, 80, 1, },
1889 { FDRIVE_DRV_120, 18, 82, 1, },
1890 { FDRIVE_DRV_120, 18, 83, 1, },
1891 { FDRIVE_DRV_120, 20, 80, 1, },
1892 /* 720 kB 5"1/4 floppy disks */
1893 { FDRIVE_DRV_120, 9, 80, 1, },
1894 { FDRIVE_DRV_120, 11, 80, 1, },
1895 /* 360 kB 5"1/4 floppy disks */
1896 { FDRIVE_DRV_120, 9, 40, 1, },
1897 { FDRIVE_DRV_120, 9, 40, 0, },
1898 { FDRIVE_DRV_120, 10, 41, 1, },
1899 { FDRIVE_DRV_120, 10, 42, 1, },
1900 /* 320 kB 5"1/4 floppy disks */
1901 { FDRIVE_DRV_120, 8, 40, 1, },
1902 { FDRIVE_DRV_120, 8, 40, 0, },
1903 /* 360 kB must match 5"1/4 better than 3"1/2... */
1904 { FDRIVE_DRV_144, 9, 80, 0, },
1905 /* end */
1906 { FDRIVE_DRV_NONE, -1, -1, 0, },
1907};
1908
1909void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1910 int *max_track, int *last_sect,
1911 FDriveType drive_in, FDriveType *drive)
1912{
1913 const FDFormat *parse;
1914 uint64_t nb_sectors, size;
1915 int i, first_match, match;
1916
1917 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1918 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1919 /* User defined disk */
1920 } else {
1921 bdrv_get_geometry(bs, &nb_sectors);
1922 match = -1;
1923 first_match = -1;
1924 for (i = 0; ; i++) {
1925 parse = &fd_formats[i];
1926 if (parse->drive == FDRIVE_DRV_NONE) {
1927 break;
1928 }
1929 if (drive_in == parse->drive ||
1930 drive_in == FDRIVE_DRV_NONE) {
1931 size = (parse->max_head + 1) * parse->max_track *
1932 parse->last_sect;
1933 if (nb_sectors == size) {
1934 match = i;
1935 break;
1936 }
1937 if (first_match == -1) {
1938 first_match = i;
1939 }
1940 }
1941 }
1942 if (match == -1) {
1943 if (first_match == -1) {
1944 match = 1;
1945 } else {
1946 match = first_match;
1947 }
1948 parse = &fd_formats[match];
1949 }
1950 *nb_heads = parse->max_head + 1;
1951 *max_track = parse->max_track;
1952 *last_sect = parse->last_sect;
1953 *drive = parse->drive;
1954 }
1955}
1956
bellard46d47672004-11-16 01:45:27 +00001957int bdrv_get_translation_hint(BlockDriverState *bs)
1958{
1959 return bs->translation;
1960}
1961
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001962void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1963 BlockErrorAction on_write_error)
1964{
1965 bs->on_read_error = on_read_error;
1966 bs->on_write_error = on_write_error;
1967}
1968
1969BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1970{
1971 return is_read ? bs->on_read_error : bs->on_write_error;
1972}
1973
bellardb3380822004-03-14 21:38:54 +00001974int bdrv_is_read_only(BlockDriverState *bs)
1975{
1976 return bs->read_only;
1977}
1978
ths985a03b2007-12-24 16:10:43 +00001979int bdrv_is_sg(BlockDriverState *bs)
1980{
1981 return bs->sg;
1982}
1983
Christoph Hellwige900a7b2009-09-04 19:01:15 +02001984int bdrv_enable_write_cache(BlockDriverState *bs)
1985{
1986 return bs->enable_write_cache;
1987}
1988
bellardea2384d2004-08-01 21:59:26 +00001989int bdrv_is_encrypted(BlockDriverState *bs)
1990{
1991 if (bs->backing_hd && bs->backing_hd->encrypted)
1992 return 1;
1993 return bs->encrypted;
1994}
1995
aliguoric0f4ce72009-03-05 23:01:01 +00001996int bdrv_key_required(BlockDriverState *bs)
1997{
1998 BlockDriverState *backing_hd = bs->backing_hd;
1999
2000 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2001 return 1;
2002 return (bs->encrypted && !bs->valid_key);
2003}
2004
bellardea2384d2004-08-01 21:59:26 +00002005int bdrv_set_key(BlockDriverState *bs, const char *key)
2006{
2007 int ret;
2008 if (bs->backing_hd && bs->backing_hd->encrypted) {
2009 ret = bdrv_set_key(bs->backing_hd, key);
2010 if (ret < 0)
2011 return ret;
2012 if (!bs->encrypted)
2013 return 0;
2014 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002015 if (!bs->encrypted) {
2016 return -EINVAL;
2017 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2018 return -ENOMEDIUM;
2019 }
aliguoric0f4ce72009-03-05 23:01:01 +00002020 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002021 if (ret < 0) {
2022 bs->valid_key = 0;
2023 } else if (!bs->valid_key) {
2024 bs->valid_key = 1;
2025 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002026 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002027 }
aliguoric0f4ce72009-03-05 23:01:01 +00002028 return ret;
bellardea2384d2004-08-01 21:59:26 +00002029}
2030
2031void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2032{
bellard19cb3732006-08-19 11:45:59 +00002033 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002034 buf[0] = '\0';
2035 } else {
2036 pstrcpy(buf, buf_size, bs->drv->format_name);
2037 }
2038}
2039
ths5fafdf22007-09-16 21:08:06 +00002040void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002041 void *opaque)
2042{
2043 BlockDriver *drv;
2044
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002045 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002046 it(opaque, drv->format_name);
2047 }
2048}
2049
bellardb3380822004-03-14 21:38:54 +00002050BlockDriverState *bdrv_find(const char *name)
2051{
2052 BlockDriverState *bs;
2053
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002054 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2055 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002056 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002057 }
bellardb3380822004-03-14 21:38:54 +00002058 }
2059 return NULL;
2060}
2061
Markus Armbruster2f399b02010-06-02 18:55:20 +02002062BlockDriverState *bdrv_next(BlockDriverState *bs)
2063{
2064 if (!bs) {
2065 return QTAILQ_FIRST(&bdrv_states);
2066 }
2067 return QTAILQ_NEXT(bs, list);
2068}
2069
aliguori51de9762009-03-05 23:00:43 +00002070void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002071{
2072 BlockDriverState *bs;
2073
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002074 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002075 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002076 }
2077}
2078
bellardea2384d2004-08-01 21:59:26 +00002079const char *bdrv_get_device_name(BlockDriverState *bs)
2080{
2081 return bs->device_name;
2082}
2083
aliguoric6ca28d2008-10-06 13:55:43 +00002084void bdrv_flush_all(void)
2085{
2086 BlockDriverState *bs;
2087
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002088 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002089 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002090 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002091 }
2092 }
aliguoric6ca28d2008-10-06 13:55:43 +00002093}
2094
Kevin Wolff2feebb2010-04-14 17:30:35 +02002095int bdrv_has_zero_init(BlockDriverState *bs)
2096{
2097 assert(bs->drv);
2098
Kevin Wolf336c1c12010-07-28 11:26:29 +02002099 if (bs->drv->bdrv_has_zero_init) {
2100 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002101 }
2102
2103 return 1;
2104}
2105
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002106typedef struct BdrvCoIsAllocatedData {
2107 BlockDriverState *bs;
2108 int64_t sector_num;
2109 int nb_sectors;
2110 int *pnum;
2111 int ret;
2112 bool done;
2113} BdrvCoIsAllocatedData;
2114
thsf58c7b32008-06-05 21:53:49 +00002115/*
2116 * Returns true iff the specified sector is present in the disk image. Drivers
2117 * not implementing the functionality are assumed to not support backing files,
2118 * hence all their sectors are reported as allocated.
2119 *
2120 * 'pnum' is set to the number of sectors (including and immediately following
2121 * the specified sector) that are known to be in the same
2122 * allocated/unallocated state.
2123 *
2124 * 'nb_sectors' is the max value 'pnum' should be set to.
2125 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002126int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2127 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002128{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002129 if (!bs->drv->bdrv_co_is_allocated) {
2130 int64_t n;
thsf58c7b32008-06-05 21:53:49 +00002131 if (sector_num >= bs->total_sectors) {
2132 *pnum = 0;
2133 return 0;
2134 }
2135 n = bs->total_sectors - sector_num;
2136 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
2137 return 1;
2138 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002139
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002140 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2141}
2142
2143/* Coroutine wrapper for bdrv_is_allocated() */
2144static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2145{
2146 BdrvCoIsAllocatedData *data = opaque;
2147 BlockDriverState *bs = data->bs;
2148
2149 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2150 data->pnum);
2151 data->done = true;
2152}
2153
2154/*
2155 * Synchronous wrapper around bdrv_co_is_allocated().
2156 *
2157 * See bdrv_co_is_allocated() for details.
2158 */
2159int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2160 int *pnum)
2161{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002162 Coroutine *co;
2163 BdrvCoIsAllocatedData data = {
2164 .bs = bs,
2165 .sector_num = sector_num,
2166 .nb_sectors = nb_sectors,
2167 .pnum = pnum,
2168 .done = false,
2169 };
2170
2171 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2172 qemu_coroutine_enter(co, &data);
2173 while (!data.done) {
2174 qemu_aio_wait();
2175 }
2176 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002177}
2178
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002179void bdrv_mon_event(const BlockDriverState *bdrv,
2180 BlockMonEventAction action, int is_read)
2181{
2182 QObject *data;
2183 const char *action_str;
2184
2185 switch (action) {
2186 case BDRV_ACTION_REPORT:
2187 action_str = "report";
2188 break;
2189 case BDRV_ACTION_IGNORE:
2190 action_str = "ignore";
2191 break;
2192 case BDRV_ACTION_STOP:
2193 action_str = "stop";
2194 break;
2195 default:
2196 abort();
2197 }
2198
2199 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2200 bdrv->device_name,
2201 action_str,
2202 is_read ? "read" : "write");
2203 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2204
2205 qobject_decref(data);
2206}
2207
Luiz Capitulinob2023812011-09-21 17:16:47 -03002208BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002209{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002210 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002211 BlockDriverState *bs;
2212
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002213 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002214 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002215
Luiz Capitulinob2023812011-09-21 17:16:47 -03002216 info->value = g_malloc0(sizeof(*info->value));
2217 info->value->device = g_strdup(bs->device_name);
2218 info->value->type = g_strdup("unknown");
2219 info->value->locked = bdrv_dev_is_medium_locked(bs);
2220 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002221
Markus Armbrustere4def802011-09-06 18:58:53 +02002222 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002223 info->value->has_tray_open = true;
2224 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002225 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002226
2227 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002228 info->value->has_io_status = true;
2229 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002230 }
2231
bellard19cb3732006-08-19 11:45:59 +00002232 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002233 info->value->has_inserted = true;
2234 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2235 info->value->inserted->file = g_strdup(bs->filename);
2236 info->value->inserted->ro = bs->read_only;
2237 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2238 info->value->inserted->encrypted = bs->encrypted;
2239 if (bs->backing_file[0]) {
2240 info->value->inserted->has_backing_file = true;
2241 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002242 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002243
2244 if (bs->io_limits_enabled) {
2245 info->value->inserted->bps =
2246 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2247 info->value->inserted->bps_rd =
2248 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2249 info->value->inserted->bps_wr =
2250 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2251 info->value->inserted->iops =
2252 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2253 info->value->inserted->iops_rd =
2254 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2255 info->value->inserted->iops_wr =
2256 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2257 }
bellardb3380822004-03-14 21:38:54 +00002258 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002259
2260 /* XXX: waiting for the qapi to support GSList */
2261 if (!cur_item) {
2262 head = cur_item = info;
2263 } else {
2264 cur_item->next = info;
2265 cur_item = info;
2266 }
bellardb3380822004-03-14 21:38:54 +00002267 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002268
Luiz Capitulinob2023812011-09-21 17:16:47 -03002269 return head;
bellardb3380822004-03-14 21:38:54 +00002270}
thsa36e69d2007-12-02 05:18:19 +00002271
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002272/* Consider exposing this as a full fledged QMP command */
2273static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002274{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002275 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002276
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002277 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002278
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002279 if (bs->device_name[0]) {
2280 s->has_device = true;
2281 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002282 }
2283
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002284 s->stats = g_malloc0(sizeof(*s->stats));
2285 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2286 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2287 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2288 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2289 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2290 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2291 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2292 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2293 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2294
Kevin Wolf294cc352010-04-28 14:34:01 +02002295 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002296 s->has_parent = true;
2297 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002298 }
2299
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002300 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002301}
2302
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002303BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002304{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002305 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002306 BlockDriverState *bs;
2307
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002308 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002309 BlockStatsList *info = g_malloc0(sizeof(*info));
2310 info->value = qmp_query_blockstat(bs, NULL);
2311
2312 /* XXX: waiting for the qapi to support GSList */
2313 if (!cur_item) {
2314 head = cur_item = info;
2315 } else {
2316 cur_item->next = info;
2317 cur_item = info;
2318 }
thsa36e69d2007-12-02 05:18:19 +00002319 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002320
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002321 return head;
thsa36e69d2007-12-02 05:18:19 +00002322}
bellardea2384d2004-08-01 21:59:26 +00002323
aliguori045df332009-03-05 23:00:48 +00002324const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2325{
2326 if (bs->backing_hd && bs->backing_hd->encrypted)
2327 return bs->backing_file;
2328 else if (bs->encrypted)
2329 return bs->filename;
2330 else
2331 return NULL;
2332}
2333
ths5fafdf22007-09-16 21:08:06 +00002334void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002335 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002336{
Kevin Wolf3574c602011-10-26 11:02:11 +02002337 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002338}
2339
ths5fafdf22007-09-16 21:08:06 +00002340int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002341 const uint8_t *buf, int nb_sectors)
2342{
2343 BlockDriver *drv = bs->drv;
2344 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002345 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002346 if (!drv->bdrv_write_compressed)
2347 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002348 if (bdrv_check_request(bs, sector_num, nb_sectors))
2349 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002350
Jan Kiszkac6d22832009-11-30 18:21:20 +01002351 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002352 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2353 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002354
bellardfaea38e2006-08-05 21:31:00 +00002355 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2356}
ths3b46e622007-09-17 08:09:54 +00002357
bellardfaea38e2006-08-05 21:31:00 +00002358int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2359{
2360 BlockDriver *drv = bs->drv;
2361 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002362 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002363 if (!drv->bdrv_get_info)
2364 return -ENOTSUP;
2365 memset(bdi, 0, sizeof(*bdi));
2366 return drv->bdrv_get_info(bs, bdi);
2367}
2368
Christoph Hellwig45566e92009-07-10 23:11:57 +02002369int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2370 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002371{
2372 BlockDriver *drv = bs->drv;
2373 if (!drv)
2374 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002375 if (drv->bdrv_save_vmstate)
2376 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2377 if (bs->file)
2378 return bdrv_save_vmstate(bs->file, buf, pos, size);
2379 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002380}
2381
Christoph Hellwig45566e92009-07-10 23:11:57 +02002382int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2383 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002384{
2385 BlockDriver *drv = bs->drv;
2386 if (!drv)
2387 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002388 if (drv->bdrv_load_vmstate)
2389 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2390 if (bs->file)
2391 return bdrv_load_vmstate(bs->file, buf, pos, size);
2392 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002393}
2394
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002395void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2396{
2397 BlockDriver *drv = bs->drv;
2398
2399 if (!drv || !drv->bdrv_debug_event) {
2400 return;
2401 }
2402
2403 return drv->bdrv_debug_event(bs, event);
2404
2405}
2406
bellardfaea38e2006-08-05 21:31:00 +00002407/**************************************************************/
2408/* handling of snapshots */
2409
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002410int bdrv_can_snapshot(BlockDriverState *bs)
2411{
2412 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002413 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002414 return 0;
2415 }
2416
2417 if (!drv->bdrv_snapshot_create) {
2418 if (bs->file != NULL) {
2419 return bdrv_can_snapshot(bs->file);
2420 }
2421 return 0;
2422 }
2423
2424 return 1;
2425}
2426
Blue Swirl199630b2010-07-25 20:49:34 +00002427int bdrv_is_snapshot(BlockDriverState *bs)
2428{
2429 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2430}
2431
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002432BlockDriverState *bdrv_snapshots(void)
2433{
2434 BlockDriverState *bs;
2435
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002436 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002437 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002438 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002439
2440 bs = NULL;
2441 while ((bs = bdrv_next(bs))) {
2442 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002443 bs_snapshots = bs;
2444 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002445 }
2446 }
2447 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002448}
2449
ths5fafdf22007-09-16 21:08:06 +00002450int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002451 QEMUSnapshotInfo *sn_info)
2452{
2453 BlockDriver *drv = bs->drv;
2454 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002455 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002456 if (drv->bdrv_snapshot_create)
2457 return drv->bdrv_snapshot_create(bs, sn_info);
2458 if (bs->file)
2459 return bdrv_snapshot_create(bs->file, sn_info);
2460 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002461}
2462
ths5fafdf22007-09-16 21:08:06 +00002463int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002464 const char *snapshot_id)
2465{
2466 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002467 int ret, open_ret;
2468
bellardfaea38e2006-08-05 21:31:00 +00002469 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002470 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002471 if (drv->bdrv_snapshot_goto)
2472 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2473
2474 if (bs->file) {
2475 drv->bdrv_close(bs);
2476 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2477 open_ret = drv->bdrv_open(bs, bs->open_flags);
2478 if (open_ret < 0) {
2479 bdrv_delete(bs->file);
2480 bs->drv = NULL;
2481 return open_ret;
2482 }
2483 return ret;
2484 }
2485
2486 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002487}
2488
2489int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2490{
2491 BlockDriver *drv = bs->drv;
2492 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002493 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002494 if (drv->bdrv_snapshot_delete)
2495 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2496 if (bs->file)
2497 return bdrv_snapshot_delete(bs->file, snapshot_id);
2498 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002499}
2500
ths5fafdf22007-09-16 21:08:06 +00002501int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002502 QEMUSnapshotInfo **psn_info)
2503{
2504 BlockDriver *drv = bs->drv;
2505 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002506 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002507 if (drv->bdrv_snapshot_list)
2508 return drv->bdrv_snapshot_list(bs, psn_info);
2509 if (bs->file)
2510 return bdrv_snapshot_list(bs->file, psn_info);
2511 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002512}
2513
edison51ef6722010-09-21 19:58:41 -07002514int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2515 const char *snapshot_name)
2516{
2517 BlockDriver *drv = bs->drv;
2518 if (!drv) {
2519 return -ENOMEDIUM;
2520 }
2521 if (!bs->read_only) {
2522 return -EINVAL;
2523 }
2524 if (drv->bdrv_snapshot_load_tmp) {
2525 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2526 }
2527 return -ENOTSUP;
2528}
2529
bellardfaea38e2006-08-05 21:31:00 +00002530#define NB_SUFFIXES 4
2531
2532char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2533{
2534 static const char suffixes[NB_SUFFIXES] = "KMGT";
2535 int64_t base;
2536 int i;
2537
2538 if (size <= 999) {
2539 snprintf(buf, buf_size, "%" PRId64, size);
2540 } else {
2541 base = 1024;
2542 for(i = 0; i < NB_SUFFIXES; i++) {
2543 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002544 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002545 (double)size / base,
2546 suffixes[i]);
2547 break;
2548 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002549 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002550 ((size + (base >> 1)) / base),
2551 suffixes[i]);
2552 break;
2553 }
2554 base = base * 1024;
2555 }
2556 }
2557 return buf;
2558}
2559
2560char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2561{
2562 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002563#ifdef _WIN32
2564 struct tm *ptm;
2565#else
bellardfaea38e2006-08-05 21:31:00 +00002566 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002567#endif
bellardfaea38e2006-08-05 21:31:00 +00002568 time_t ti;
2569 int64_t secs;
2570
2571 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002572 snprintf(buf, buf_size,
2573 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002574 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2575 } else {
2576 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002577#ifdef _WIN32
2578 ptm = localtime(&ti);
2579 strftime(date_buf, sizeof(date_buf),
2580 "%Y-%m-%d %H:%M:%S", ptm);
2581#else
bellardfaea38e2006-08-05 21:31:00 +00002582 localtime_r(&ti, &tm);
2583 strftime(date_buf, sizeof(date_buf),
2584 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002585#endif
bellardfaea38e2006-08-05 21:31:00 +00002586 secs = sn->vm_clock_nsec / 1000000000;
2587 snprintf(clock_buf, sizeof(clock_buf),
2588 "%02d:%02d:%02d.%03d",
2589 (int)(secs / 3600),
2590 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002591 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002592 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2593 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002594 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002595 sn->id_str, sn->name,
2596 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2597 date_buf,
2598 clock_buf);
2599 }
2600 return buf;
2601}
2602
bellard83f64092006-08-01 16:21:11 +00002603/**************************************************************/
2604/* async I/Os */
2605
aliguori3b69e4b2009-01-22 16:59:24 +00002606BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002607 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002608 BlockDriverCompletionFunc *cb, void *opaque)
2609{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002610 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2611
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002612 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002613 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002614}
2615
aliguorif141eaf2009-04-07 18:43:24 +00002616BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2617 QEMUIOVector *qiov, int nb_sectors,
2618 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002619{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002620 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2621
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002622 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002623 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002624}
2625
Kevin Wolf40b4f532009-09-09 17:53:37 +02002626
2627typedef struct MultiwriteCB {
2628 int error;
2629 int num_requests;
2630 int num_callbacks;
2631 struct {
2632 BlockDriverCompletionFunc *cb;
2633 void *opaque;
2634 QEMUIOVector *free_qiov;
2635 void *free_buf;
2636 } callbacks[];
2637} MultiwriteCB;
2638
2639static void multiwrite_user_cb(MultiwriteCB *mcb)
2640{
2641 int i;
2642
2643 for (i = 0; i < mcb->num_callbacks; i++) {
2644 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002645 if (mcb->callbacks[i].free_qiov) {
2646 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2647 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002648 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002649 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002650 }
2651}
2652
2653static void multiwrite_cb(void *opaque, int ret)
2654{
2655 MultiwriteCB *mcb = opaque;
2656
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002657 trace_multiwrite_cb(mcb, ret);
2658
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002659 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002660 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002661 }
2662
2663 mcb->num_requests--;
2664 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002665 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002666 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002667 }
2668}
2669
2670static int multiwrite_req_compare(const void *a, const void *b)
2671{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002672 const BlockRequest *req1 = a, *req2 = b;
2673
2674 /*
2675 * Note that we can't simply subtract req2->sector from req1->sector
2676 * here as that could overflow the return value.
2677 */
2678 if (req1->sector > req2->sector) {
2679 return 1;
2680 } else if (req1->sector < req2->sector) {
2681 return -1;
2682 } else {
2683 return 0;
2684 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002685}
2686
2687/*
2688 * Takes a bunch of requests and tries to merge them. Returns the number of
2689 * requests that remain after merging.
2690 */
2691static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2692 int num_reqs, MultiwriteCB *mcb)
2693{
2694 int i, outidx;
2695
2696 // Sort requests by start sector
2697 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2698
2699 // Check if adjacent requests touch the same clusters. If so, combine them,
2700 // filling up gaps with zero sectors.
2701 outidx = 0;
2702 for (i = 1; i < num_reqs; i++) {
2703 int merge = 0;
2704 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2705
2706 // This handles the cases that are valid for all block drivers, namely
2707 // exactly sequential writes and overlapping writes.
2708 if (reqs[i].sector <= oldreq_last) {
2709 merge = 1;
2710 }
2711
2712 // The block driver may decide that it makes sense to combine requests
2713 // even if there is a gap of some sectors between them. In this case,
2714 // the gap is filled with zeros (therefore only applicable for yet
2715 // unused space in format like qcow2).
2716 if (!merge && bs->drv->bdrv_merge_requests) {
2717 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2718 }
2719
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002720 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2721 merge = 0;
2722 }
2723
Kevin Wolf40b4f532009-09-09 17:53:37 +02002724 if (merge) {
2725 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002726 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002727 qemu_iovec_init(qiov,
2728 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2729
2730 // Add the first request to the merged one. If the requests are
2731 // overlapping, drop the last sectors of the first request.
2732 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2733 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2734
2735 // We might need to add some zeros between the two requests
2736 if (reqs[i].sector > oldreq_last) {
2737 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2738 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2739 memset(buf, 0, zero_bytes);
2740 qemu_iovec_add(qiov, buf, zero_bytes);
2741 mcb->callbacks[i].free_buf = buf;
2742 }
2743
2744 // Add the second request
2745 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2746
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002747 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002748 reqs[outidx].qiov = qiov;
2749
2750 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2751 } else {
2752 outidx++;
2753 reqs[outidx].sector = reqs[i].sector;
2754 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2755 reqs[outidx].qiov = reqs[i].qiov;
2756 }
2757 }
2758
2759 return outidx + 1;
2760}
2761
2762/*
2763 * Submit multiple AIO write requests at once.
2764 *
2765 * On success, the function returns 0 and all requests in the reqs array have
2766 * been submitted. In error case this function returns -1, and any of the
2767 * requests may or may not be submitted yet. In particular, this means that the
2768 * callback will be called for some of the requests, for others it won't. The
2769 * caller must check the error field of the BlockRequest to wait for the right
2770 * callbacks (if error != 0, no callback will be called).
2771 *
2772 * The implementation may modify the contents of the reqs array, e.g. to merge
2773 * requests. However, the fields opaque and error are left unmodified as they
2774 * are used to signal failure for a single request to the caller.
2775 */
2776int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2777{
2778 BlockDriverAIOCB *acb;
2779 MultiwriteCB *mcb;
2780 int i;
2781
Ryan Harper301db7c2011-03-07 10:01:04 -06002782 /* don't submit writes if we don't have a medium */
2783 if (bs->drv == NULL) {
2784 for (i = 0; i < num_reqs; i++) {
2785 reqs[i].error = -ENOMEDIUM;
2786 }
2787 return -1;
2788 }
2789
Kevin Wolf40b4f532009-09-09 17:53:37 +02002790 if (num_reqs == 0) {
2791 return 0;
2792 }
2793
2794 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002795 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002796 mcb->num_requests = 0;
2797 mcb->num_callbacks = num_reqs;
2798
2799 for (i = 0; i < num_reqs; i++) {
2800 mcb->callbacks[i].cb = reqs[i].cb;
2801 mcb->callbacks[i].opaque = reqs[i].opaque;
2802 }
2803
2804 // Check for mergable requests
2805 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2806
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002807 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2808
Kevin Wolf453f9a12010-07-02 14:01:21 +02002809 /*
2810 * Run the aio requests. As soon as one request can't be submitted
2811 * successfully, fail all requests that are not yet submitted (we must
2812 * return failure for all requests anyway)
2813 *
2814 * num_requests cannot be set to the right value immediately: If
2815 * bdrv_aio_writev fails for some request, num_requests would be too high
2816 * and therefore multiwrite_cb() would never recognize the multiwrite
2817 * request as completed. We also cannot use the loop variable i to set it
2818 * when the first request fails because the callback may already have been
2819 * called for previously submitted requests. Thus, num_requests must be
2820 * incremented for each request that is submitted.
2821 *
2822 * The problem that callbacks may be called early also means that we need
2823 * to take care that num_requests doesn't become 0 before all requests are
2824 * submitted - multiwrite_cb() would consider the multiwrite request
2825 * completed. A dummy request that is "completed" by a manual call to
2826 * multiwrite_cb() takes care of this.
2827 */
2828 mcb->num_requests = 1;
2829
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002830 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002831 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002832 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002833 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2834 reqs[i].nb_sectors, multiwrite_cb, mcb);
2835
2836 if (acb == NULL) {
2837 // We can only fail the whole thing if no request has been
2838 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2839 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002840 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002841 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002842 goto fail;
2843 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002844 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002845 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002846 break;
2847 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002848 }
2849 }
2850
Kevin Wolf453f9a12010-07-02 14:01:21 +02002851 /* Complete the dummy request */
2852 multiwrite_cb(mcb, 0);
2853
Kevin Wolf40b4f532009-09-09 17:53:37 +02002854 return 0;
2855
2856fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002857 for (i = 0; i < mcb->num_callbacks; i++) {
2858 reqs[i].error = -EIO;
2859 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002860 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002861 return -1;
2862}
2863
bellard83f64092006-08-01 16:21:11 +00002864void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002865{
aliguori6bbff9a2009-03-20 18:25:59 +00002866 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002867}
2868
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002869/* block I/O throttling */
2870static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2871 bool is_write, double elapsed_time, uint64_t *wait)
2872{
2873 uint64_t bps_limit = 0;
2874 double bytes_limit, bytes_base, bytes_res;
2875 double slice_time, wait_time;
2876
2877 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2878 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2879 } else if (bs->io_limits.bps[is_write]) {
2880 bps_limit = bs->io_limits.bps[is_write];
2881 } else {
2882 if (wait) {
2883 *wait = 0;
2884 }
2885
2886 return false;
2887 }
2888
2889 slice_time = bs->slice_end - bs->slice_start;
2890 slice_time /= (NANOSECONDS_PER_SECOND);
2891 bytes_limit = bps_limit * slice_time;
2892 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2893 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2894 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2895 }
2896
2897 /* bytes_base: the bytes of data which have been read/written; and
2898 * it is obtained from the history statistic info.
2899 * bytes_res: the remaining bytes of data which need to be read/written.
2900 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2901 * the total time for completing reading/writting all data.
2902 */
2903 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2904
2905 if (bytes_base + bytes_res <= bytes_limit) {
2906 if (wait) {
2907 *wait = 0;
2908 }
2909
2910 return false;
2911 }
2912
2913 /* Calc approx time to dispatch */
2914 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2915
2916 /* When the I/O rate at runtime exceeds the limits,
2917 * bs->slice_end need to be extended in order that the current statistic
2918 * info can be kept until the timer fire, so it is increased and tuned
2919 * based on the result of experiment.
2920 */
2921 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2922 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2923 if (wait) {
2924 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2925 }
2926
2927 return true;
2928}
2929
2930static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2931 double elapsed_time, uint64_t *wait)
2932{
2933 uint64_t iops_limit = 0;
2934 double ios_limit, ios_base;
2935 double slice_time, wait_time;
2936
2937 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2938 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2939 } else if (bs->io_limits.iops[is_write]) {
2940 iops_limit = bs->io_limits.iops[is_write];
2941 } else {
2942 if (wait) {
2943 *wait = 0;
2944 }
2945
2946 return false;
2947 }
2948
2949 slice_time = bs->slice_end - bs->slice_start;
2950 slice_time /= (NANOSECONDS_PER_SECOND);
2951 ios_limit = iops_limit * slice_time;
2952 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2953 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2954 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2955 }
2956
2957 if (ios_base + 1 <= ios_limit) {
2958 if (wait) {
2959 *wait = 0;
2960 }
2961
2962 return false;
2963 }
2964
2965 /* Calc approx time to dispatch */
2966 wait_time = (ios_base + 1) / iops_limit;
2967 if (wait_time > elapsed_time) {
2968 wait_time = wait_time - elapsed_time;
2969 } else {
2970 wait_time = 0;
2971 }
2972
2973 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2974 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2975 if (wait) {
2976 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2977 }
2978
2979 return true;
2980}
2981
2982static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2983 bool is_write, int64_t *wait)
2984{
2985 int64_t now, max_wait;
2986 uint64_t bps_wait = 0, iops_wait = 0;
2987 double elapsed_time;
2988 int bps_ret, iops_ret;
2989
2990 now = qemu_get_clock_ns(vm_clock);
2991 if ((bs->slice_start < now)
2992 && (bs->slice_end > now)) {
2993 bs->slice_end = now + bs->slice_time;
2994 } else {
2995 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2996 bs->slice_start = now;
2997 bs->slice_end = now + bs->slice_time;
2998
2999 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3000 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3001
3002 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3003 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3004 }
3005
3006 elapsed_time = now - bs->slice_start;
3007 elapsed_time /= (NANOSECONDS_PER_SECOND);
3008
3009 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3010 is_write, elapsed_time, &bps_wait);
3011 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3012 elapsed_time, &iops_wait);
3013 if (bps_ret || iops_ret) {
3014 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3015 if (wait) {
3016 *wait = max_wait;
3017 }
3018
3019 now = qemu_get_clock_ns(vm_clock);
3020 if (bs->slice_end < now + max_wait) {
3021 bs->slice_end = now + max_wait;
3022 }
3023
3024 return true;
3025 }
3026
3027 if (wait) {
3028 *wait = 0;
3029 }
3030
3031 return false;
3032}
pbrookce1a14d2006-08-07 02:38:06 +00003033
bellard83f64092006-08-01 16:21:11 +00003034/**************************************************************/
3035/* async block device emulation */
3036
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003037typedef struct BlockDriverAIOCBSync {
3038 BlockDriverAIOCB common;
3039 QEMUBH *bh;
3040 int ret;
3041 /* vector translation state */
3042 QEMUIOVector *qiov;
3043 uint8_t *bounce;
3044 int is_write;
3045} BlockDriverAIOCBSync;
3046
3047static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3048{
Kevin Wolfb666d232010-05-05 11:44:39 +02003049 BlockDriverAIOCBSync *acb =
3050 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003051 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003052 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003053 qemu_aio_release(acb);
3054}
3055
3056static AIOPool bdrv_em_aio_pool = {
3057 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3058 .cancel = bdrv_aio_cancel_em,
3059};
3060
bellard83f64092006-08-01 16:21:11 +00003061static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003062{
pbrookce1a14d2006-08-07 02:38:06 +00003063 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003064
aliguorif141eaf2009-04-07 18:43:24 +00003065 if (!acb->is_write)
3066 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003067 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003068 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003069 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003070 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003071 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003072}
bellardbeac80c2006-06-26 20:08:57 +00003073
aliguorif141eaf2009-04-07 18:43:24 +00003074static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3075 int64_t sector_num,
3076 QEMUIOVector *qiov,
3077 int nb_sectors,
3078 BlockDriverCompletionFunc *cb,
3079 void *opaque,
3080 int is_write)
3081
bellardea2384d2004-08-01 21:59:26 +00003082{
pbrookce1a14d2006-08-07 02:38:06 +00003083 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003084
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003085 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003086 acb->is_write = is_write;
3087 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003088 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00003089
pbrookce1a14d2006-08-07 02:38:06 +00003090 if (!acb->bh)
3091 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003092
3093 if (is_write) {
3094 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003095 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003096 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003097 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003098 }
3099
pbrookce1a14d2006-08-07 02:38:06 +00003100 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003101
pbrookce1a14d2006-08-07 02:38:06 +00003102 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003103}
3104
aliguorif141eaf2009-04-07 18:43:24 +00003105static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3106 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003107 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003108{
aliguorif141eaf2009-04-07 18:43:24 +00003109 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003110}
3111
aliguorif141eaf2009-04-07 18:43:24 +00003112static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3113 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3114 BlockDriverCompletionFunc *cb, void *opaque)
3115{
3116 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3117}
3118
Kevin Wolf68485422011-06-30 10:05:46 +02003119
3120typedef struct BlockDriverAIOCBCoroutine {
3121 BlockDriverAIOCB common;
3122 BlockRequest req;
3123 bool is_write;
3124 QEMUBH* bh;
3125} BlockDriverAIOCBCoroutine;
3126
3127static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3128{
3129 qemu_aio_flush();
3130}
3131
3132static AIOPool bdrv_em_co_aio_pool = {
3133 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3134 .cancel = bdrv_aio_co_cancel_em,
3135};
3136
Paolo Bonzini35246a62011-10-14 10:41:29 +02003137static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003138{
3139 BlockDriverAIOCBCoroutine *acb = opaque;
3140
3141 acb->common.cb(acb->common.opaque, acb->req.error);
3142 qemu_bh_delete(acb->bh);
3143 qemu_aio_release(acb);
3144}
3145
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003146/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3147static void coroutine_fn bdrv_co_do_rw(void *opaque)
3148{
3149 BlockDriverAIOCBCoroutine *acb = opaque;
3150 BlockDriverState *bs = acb->common.bs;
3151
3152 if (!acb->is_write) {
3153 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3154 acb->req.nb_sectors, acb->req.qiov);
3155 } else {
3156 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3157 acb->req.nb_sectors, acb->req.qiov);
3158 }
3159
Paolo Bonzini35246a62011-10-14 10:41:29 +02003160 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003161 qemu_bh_schedule(acb->bh);
3162}
3163
Kevin Wolf68485422011-06-30 10:05:46 +02003164static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3165 int64_t sector_num,
3166 QEMUIOVector *qiov,
3167 int nb_sectors,
3168 BlockDriverCompletionFunc *cb,
3169 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003170 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003171{
3172 Coroutine *co;
3173 BlockDriverAIOCBCoroutine *acb;
3174
3175 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3176 acb->req.sector = sector_num;
3177 acb->req.nb_sectors = nb_sectors;
3178 acb->req.qiov = qiov;
3179 acb->is_write = is_write;
3180
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003181 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003182 qemu_coroutine_enter(co, acb);
3183
3184 return &acb->common;
3185}
3186
Paolo Bonzini07f07612011-10-17 12:32:12 +02003187static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003188{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003189 BlockDriverAIOCBCoroutine *acb = opaque;
3190 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003191
Paolo Bonzini07f07612011-10-17 12:32:12 +02003192 acb->req.error = bdrv_co_flush(bs);
3193 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003194 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003195}
3196
Paolo Bonzini07f07612011-10-17 12:32:12 +02003197BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003198 BlockDriverCompletionFunc *cb, void *opaque)
3199{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003200 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003201
Paolo Bonzini07f07612011-10-17 12:32:12 +02003202 Coroutine *co;
3203 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003204
Paolo Bonzini07f07612011-10-17 12:32:12 +02003205 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3206 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3207 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003208
Alexander Graf016f5cf2010-05-26 17:51:49 +02003209 return &acb->common;
3210}
3211
Paolo Bonzini4265d622011-10-17 12:32:14 +02003212static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3213{
3214 BlockDriverAIOCBCoroutine *acb = opaque;
3215 BlockDriverState *bs = acb->common.bs;
3216
3217 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3218 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3219 qemu_bh_schedule(acb->bh);
3220}
3221
3222BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3223 int64_t sector_num, int nb_sectors,
3224 BlockDriverCompletionFunc *cb, void *opaque)
3225{
3226 Coroutine *co;
3227 BlockDriverAIOCBCoroutine *acb;
3228
3229 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3230
3231 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3232 acb->req.sector = sector_num;
3233 acb->req.nb_sectors = nb_sectors;
3234 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3235 qemu_coroutine_enter(co, acb);
3236
3237 return &acb->common;
3238}
3239
bellardea2384d2004-08-01 21:59:26 +00003240void bdrv_init(void)
3241{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003242 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003243}
pbrookce1a14d2006-08-07 02:38:06 +00003244
Markus Armbrustereb852012009-10-27 18:41:44 +01003245void bdrv_init_with_whitelist(void)
3246{
3247 use_bdrv_whitelist = 1;
3248 bdrv_init();
3249}
3250
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003251void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3252 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003253{
pbrookce1a14d2006-08-07 02:38:06 +00003254 BlockDriverAIOCB *acb;
3255
aliguori6bbff9a2009-03-20 18:25:59 +00003256 if (pool->free_aiocb) {
3257 acb = pool->free_aiocb;
3258 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003259 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003260 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003261 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003262 }
3263 acb->bs = bs;
3264 acb->cb = cb;
3265 acb->opaque = opaque;
3266 return acb;
3267}
3268
3269void qemu_aio_release(void *p)
3270{
aliguori6bbff9a2009-03-20 18:25:59 +00003271 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3272 AIOPool *pool = acb->pool;
3273 acb->next = pool->free_aiocb;
3274 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003275}
bellard19cb3732006-08-19 11:45:59 +00003276
3277/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003278/* Coroutine block device emulation */
3279
3280typedef struct CoroutineIOCompletion {
3281 Coroutine *coroutine;
3282 int ret;
3283} CoroutineIOCompletion;
3284
3285static void bdrv_co_io_em_complete(void *opaque, int ret)
3286{
3287 CoroutineIOCompletion *co = opaque;
3288
3289 co->ret = ret;
3290 qemu_coroutine_enter(co->coroutine, NULL);
3291}
3292
3293static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3294 int nb_sectors, QEMUIOVector *iov,
3295 bool is_write)
3296{
3297 CoroutineIOCompletion co = {
3298 .coroutine = qemu_coroutine_self(),
3299 };
3300 BlockDriverAIOCB *acb;
3301
3302 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003303 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3304 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003305 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003306 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3307 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003308 }
3309
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003310 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003311 if (!acb) {
3312 return -EIO;
3313 }
3314 qemu_coroutine_yield();
3315
3316 return co.ret;
3317}
3318
3319static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3320 int64_t sector_num, int nb_sectors,
3321 QEMUIOVector *iov)
3322{
3323 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3324}
3325
3326static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3327 int64_t sector_num, int nb_sectors,
3328 QEMUIOVector *iov)
3329{
3330 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3331}
3332
Paolo Bonzini07f07612011-10-17 12:32:12 +02003333static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003334{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003335 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003336
Paolo Bonzini07f07612011-10-17 12:32:12 +02003337 rwco->ret = bdrv_co_flush(rwco->bs);
3338}
3339
3340int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3341{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003342 int ret;
3343
Kevin Wolfca716362011-11-10 18:13:59 +01003344 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003345 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003346 }
3347
Kevin Wolfca716362011-11-10 18:13:59 +01003348 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003349 if (bs->drv->bdrv_co_flush_to_os) {
3350 ret = bs->drv->bdrv_co_flush_to_os(bs);
3351 if (ret < 0) {
3352 return ret;
3353 }
3354 }
3355
Kevin Wolfca716362011-11-10 18:13:59 +01003356 /* But don't actually force it to the disk with cache=unsafe */
3357 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3358 return 0;
3359 }
3360
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003361 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003362 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003363 } else if (bs->drv->bdrv_aio_flush) {
3364 BlockDriverAIOCB *acb;
3365 CoroutineIOCompletion co = {
3366 .coroutine = qemu_coroutine_self(),
3367 };
3368
3369 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3370 if (acb == NULL) {
3371 return -EIO;
3372 } else {
3373 qemu_coroutine_yield();
3374 return co.ret;
3375 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003376 } else {
3377 /*
3378 * Some block drivers always operate in either writethrough or unsafe
3379 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3380 * know how the server works (because the behaviour is hardcoded or
3381 * depends on server-side configuration), so we can't ensure that
3382 * everything is safe on disk. Returning an error doesn't work because
3383 * that would break guests even if the server operates in writethrough
3384 * mode.
3385 *
3386 * Let's hope the user knows what he's doing.
3387 */
3388 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003389 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003390}
3391
Anthony Liguori0f154232011-11-14 15:09:45 -06003392void bdrv_invalidate_cache(BlockDriverState *bs)
3393{
3394 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3395 bs->drv->bdrv_invalidate_cache(bs);
3396 }
3397}
3398
3399void bdrv_invalidate_cache_all(void)
3400{
3401 BlockDriverState *bs;
3402
3403 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3404 bdrv_invalidate_cache(bs);
3405 }
3406}
3407
Paolo Bonzini07f07612011-10-17 12:32:12 +02003408int bdrv_flush(BlockDriverState *bs)
3409{
3410 Coroutine *co;
3411 RwCo rwco = {
3412 .bs = bs,
3413 .ret = NOT_DONE,
3414 };
3415
3416 if (qemu_in_coroutine()) {
3417 /* Fast-path if already in coroutine context */
3418 bdrv_flush_co_entry(&rwco);
3419 } else {
3420 co = qemu_coroutine_create(bdrv_flush_co_entry);
3421 qemu_coroutine_enter(co, &rwco);
3422 while (rwco.ret == NOT_DONE) {
3423 qemu_aio_wait();
3424 }
3425 }
3426
3427 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003428}
3429
Paolo Bonzini4265d622011-10-17 12:32:14 +02003430static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3431{
3432 RwCo *rwco = opaque;
3433
3434 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3435}
3436
3437int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3438 int nb_sectors)
3439{
3440 if (!bs->drv) {
3441 return -ENOMEDIUM;
3442 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3443 return -EIO;
3444 } else if (bs->read_only) {
3445 return -EROFS;
3446 } else if (bs->drv->bdrv_co_discard) {
3447 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3448 } else if (bs->drv->bdrv_aio_discard) {
3449 BlockDriverAIOCB *acb;
3450 CoroutineIOCompletion co = {
3451 .coroutine = qemu_coroutine_self(),
3452 };
3453
3454 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3455 bdrv_co_io_em_complete, &co);
3456 if (acb == NULL) {
3457 return -EIO;
3458 } else {
3459 qemu_coroutine_yield();
3460 return co.ret;
3461 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003462 } else {
3463 return 0;
3464 }
3465}
3466
3467int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3468{
3469 Coroutine *co;
3470 RwCo rwco = {
3471 .bs = bs,
3472 .sector_num = sector_num,
3473 .nb_sectors = nb_sectors,
3474 .ret = NOT_DONE,
3475 };
3476
3477 if (qemu_in_coroutine()) {
3478 /* Fast-path if already in coroutine context */
3479 bdrv_discard_co_entry(&rwco);
3480 } else {
3481 co = qemu_coroutine_create(bdrv_discard_co_entry);
3482 qemu_coroutine_enter(co, &rwco);
3483 while (rwco.ret == NOT_DONE) {
3484 qemu_aio_wait();
3485 }
3486 }
3487
3488 return rwco.ret;
3489}
3490
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003491/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003492/* removable device support */
3493
3494/**
3495 * Return TRUE if the media is present
3496 */
3497int bdrv_is_inserted(BlockDriverState *bs)
3498{
3499 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003500
bellard19cb3732006-08-19 11:45:59 +00003501 if (!drv)
3502 return 0;
3503 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003504 return 1;
3505 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003506}
3507
3508/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003509 * Return whether the media changed since the last call to this
3510 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003511 */
3512int bdrv_media_changed(BlockDriverState *bs)
3513{
3514 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003515
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003516 if (drv && drv->bdrv_media_changed) {
3517 return drv->bdrv_media_changed(bs);
3518 }
3519 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003520}
3521
3522/**
3523 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3524 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003525void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003526{
3527 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003528
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003529 if (drv && drv->bdrv_eject) {
3530 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003531 }
bellard19cb3732006-08-19 11:45:59 +00003532}
3533
bellard19cb3732006-08-19 11:45:59 +00003534/**
3535 * Lock or unlock the media (if it is locked, the user won't be able
3536 * to eject it manually).
3537 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003538void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003539{
3540 BlockDriver *drv = bs->drv;
3541
Markus Armbruster025e8492011-09-06 18:58:47 +02003542 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003543
Markus Armbruster025e8492011-09-06 18:58:47 +02003544 if (drv && drv->bdrv_lock_medium) {
3545 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003546 }
3547}
ths985a03b2007-12-24 16:10:43 +00003548
3549/* needed for generic scsi interface */
3550
3551int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3552{
3553 BlockDriver *drv = bs->drv;
3554
3555 if (drv && drv->bdrv_ioctl)
3556 return drv->bdrv_ioctl(bs, req, buf);
3557 return -ENOTSUP;
3558}
aliguori7d780662009-03-12 19:57:08 +00003559
aliguori221f7152009-03-28 17:28:41 +00003560BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3561 unsigned long int req, void *buf,
3562 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003563{
aliguori221f7152009-03-28 17:28:41 +00003564 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003565
aliguori221f7152009-03-28 17:28:41 +00003566 if (drv && drv->bdrv_aio_ioctl)
3567 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3568 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003569}
aliguorie268ca52009-04-22 20:20:00 +00003570
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003571void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3572{
3573 bs->buffer_alignment = align;
3574}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003575
aliguorie268ca52009-04-22 20:20:00 +00003576void *qemu_blockalign(BlockDriverState *bs, size_t size)
3577{
3578 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3579}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003580
3581void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3582{
3583 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003584
Liran Schouraaa0eb72010-01-26 10:31:48 +02003585 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003586 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003587 if (!bs->dirty_bitmap) {
3588 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3589 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3590 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003591
Anthony Liguori7267c092011-08-20 22:09:37 -05003592 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003593 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003594 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003595 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003596 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003597 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003598 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003599 }
3600}
3601
3602int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3603{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003604 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003605
Jan Kiszkac6d22832009-11-30 18:21:20 +01003606 if (bs->dirty_bitmap &&
3607 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003608 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3609 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003610 } else {
3611 return 0;
3612 }
3613}
3614
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003615void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3616 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003617{
3618 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3619}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003620
3621int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3622{
3623 return bs->dirty_count;
3624}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003625
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003626void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3627{
3628 assert(bs->in_use != in_use);
3629 bs->in_use = in_use;
3630}
3631
3632int bdrv_in_use(BlockDriverState *bs)
3633{
3634 return bs->in_use;
3635}
3636
Luiz Capitulino28a72822011-09-26 17:43:50 -03003637void bdrv_iostatus_enable(BlockDriverState *bs)
3638{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003639 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003640 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003641}
3642
3643/* The I/O status is only enabled if the drive explicitly
3644 * enables it _and_ the VM is configured to stop on errors */
3645bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3646{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003647 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003648 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3649 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3650 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3651}
3652
3653void bdrv_iostatus_disable(BlockDriverState *bs)
3654{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003655 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003656}
3657
3658void bdrv_iostatus_reset(BlockDriverState *bs)
3659{
3660 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003661 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003662 }
3663}
3664
3665/* XXX: Today this is set by device models because it makes the implementation
3666 quite simple. However, the block layer knows about the error, so it's
3667 possible to implement this without device models being involved */
3668void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3669{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003670 if (bdrv_iostatus_is_enabled(bs) &&
3671 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003672 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003673 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3674 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003675 }
3676}
3677
Christoph Hellwiga597e792011-08-25 08:26:01 +02003678void
3679bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3680 enum BlockAcctType type)
3681{
3682 assert(type < BDRV_MAX_IOTYPE);
3683
3684 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003685 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003686 cookie->type = type;
3687}
3688
3689void
3690bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3691{
3692 assert(cookie->type < BDRV_MAX_IOTYPE);
3693
3694 bs->nr_bytes[cookie->type] += cookie->bytes;
3695 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003696 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003697}
3698
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003699int bdrv_img_create(const char *filename, const char *fmt,
3700 const char *base_filename, const char *base_fmt,
3701 char *options, uint64_t img_size, int flags)
3702{
3703 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003704 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003705 BlockDriverState *bs = NULL;
3706 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003707 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003708 int ret = 0;
3709
3710 /* Find driver and parse its options */
3711 drv = bdrv_find_format(fmt);
3712 if (!drv) {
3713 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003714 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003715 goto out;
3716 }
3717
3718 proto_drv = bdrv_find_protocol(filename);
3719 if (!proto_drv) {
3720 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003721 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003722 goto out;
3723 }
3724
3725 create_options = append_option_parameters(create_options,
3726 drv->create_options);
3727 create_options = append_option_parameters(create_options,
3728 proto_drv->create_options);
3729
3730 /* Create parameter list with default values */
3731 param = parse_option_parameters("", create_options, param);
3732
3733 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3734
3735 /* Parse -o options */
3736 if (options) {
3737 param = parse_option_parameters(options, create_options, param);
3738 if (param == NULL) {
3739 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003740 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003741 goto out;
3742 }
3743 }
3744
3745 if (base_filename) {
3746 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3747 base_filename)) {
3748 error_report("Backing file not supported for file format '%s'",
3749 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003750 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003751 goto out;
3752 }
3753 }
3754
3755 if (base_fmt) {
3756 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3757 error_report("Backing file format not supported for file "
3758 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003759 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003760 goto out;
3761 }
3762 }
3763
Jes Sorensen792da932010-12-16 13:52:17 +01003764 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3765 if (backing_file && backing_file->value.s) {
3766 if (!strcmp(filename, backing_file->value.s)) {
3767 error_report("Error: Trying to create an image with the "
3768 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003769 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003770 goto out;
3771 }
3772 }
3773
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003774 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3775 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003776 backing_drv = bdrv_find_format(backing_fmt->value.s);
3777 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003778 error_report("Unknown backing file format '%s'",
3779 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003780 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003781 goto out;
3782 }
3783 }
3784
3785 // The size for the image must always be specified, with one exception:
3786 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003787 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3788 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003789 if (backing_file && backing_file->value.s) {
3790 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003791 char buf[32];
3792
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003793 bs = bdrv_new("");
3794
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003795 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003796 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003797 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003798 goto out;
3799 }
3800 bdrv_get_geometry(bs, &size);
3801 size *= 512;
3802
3803 snprintf(buf, sizeof(buf), "%" PRId64, size);
3804 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3805 } else {
3806 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003807 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003808 goto out;
3809 }
3810 }
3811
3812 printf("Formatting '%s', fmt=%s ", filename, fmt);
3813 print_option_parameters(param);
3814 puts("");
3815
3816 ret = bdrv_create(drv, filename, param);
3817
3818 if (ret < 0) {
3819 if (ret == -ENOTSUP) {
3820 error_report("Formatting or formatting option not supported for "
3821 "file format '%s'", fmt);
3822 } else if (ret == -EFBIG) {
3823 error_report("The image size is too large for file format '%s'",
3824 fmt);
3825 } else {
3826 error_report("%s: error while creating %s: %s", filename, fmt,
3827 strerror(-ret));
3828 }
3829 }
3830
3831out:
3832 free_option_parameters(create_options);
3833 free_option_parameters(param);
3834
3835 if (bs) {
3836 bdrv_delete(bs);
3837 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003838
3839 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003840}