blob: ecc5b442d475977813f1b3863c4cb067d580104b [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000541/**
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
545 */
546void bdrv_enable_copy_on_read(BlockDriverState *bs)
547{
548 bs->copy_on_read++;
549}
550
551void bdrv_disable_copy_on_read(BlockDriverState *bs)
552{
553 assert(bs->copy_on_read > 0);
554 bs->copy_on_read--;
555}
556
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200557/*
Kevin Wolf57915332010-04-14 15:24:50 +0200558 * Common part for opening disk images and files
559 */
560static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
562{
563 int ret, open_flags;
564
565 assert(drv != NULL);
566
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
568
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200569 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200571 bs->encrypted = 0;
572 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100573 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200574 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100575 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200576 bs->buffer_alignment = 512;
577
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
581 }
582
Kevin Wolf57915332010-04-14 15:24:50 +0200583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100584 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200585
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
587 return -ENOTSUP;
588 }
589
590 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500591 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200592
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200594
595 /*
596 * Clear flags that are internal to the block layer before opening the
597 * image.
598 */
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
600
601 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200602 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200603 */
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
606 }
607
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
609
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
613 } else {
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
615 if (ret >= 0) {
616 ret = drv->bdrv_open(bs, open_flags);
617 }
618 }
619
Kevin Wolf57915332010-04-14 15:24:50 +0200620 if (ret < 0) {
621 goto free_and_fail;
622 }
623
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100624 ret = refresh_total_sectors(bs, bs->total_sectors);
625 if (ret < 0) {
626 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200627 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100628
Kevin Wolf57915332010-04-14 15:24:50 +0200629#ifndef _WIN32
630 if (bs->is_temporary) {
631 unlink(filename);
632 }
633#endif
634 return 0;
635
636free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200637 if (bs->file) {
638 bdrv_delete(bs->file);
639 bs->file = NULL;
640 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500641 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200642 bs->opaque = NULL;
643 bs->drv = NULL;
644 return ret;
645}
646
647/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200648 * Opens a file using a protocol (file, host_device, nbd, ...)
649 */
bellard83f64092006-08-01 16:21:11 +0000650int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000651{
bellard83f64092006-08-01 16:21:11 +0000652 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200653 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000654 int ret;
655
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900656 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200657 if (!drv) {
658 return -ENOENT;
659 }
660
bellard83f64092006-08-01 16:21:11 +0000661 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200662 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000663 if (ret < 0) {
664 bdrv_delete(bs);
665 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000666 }
aliguori71d07702009-03-03 17:37:16 +0000667 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000668 *pbs = bs;
669 return 0;
bellardea2384d2004-08-01 21:59:26 +0000670}
bellardfc01f7e2003-06-30 10:03:06 +0000671
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200672/*
673 * Opens a disk image (raw, qcow2, vmdk, ...)
674 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200675int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
676 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000677{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200678 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200679 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000680
bellard83f64092006-08-01 16:21:11 +0000681 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000682 BlockDriverState *bs1;
683 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000684 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200687 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000688
bellardea2384d2004-08-01 21:59:26 +0000689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
691
692 /* if there is a backing file, use it */
693 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200694 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000695 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000696 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000697 return ret;
bellardea2384d2004-08-01 21:59:26 +0000698 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000700
701 if (bs1->drv && bs1->drv->protocol_name)
702 is_protocol = 1;
703
bellardea2384d2004-08-01 21:59:26 +0000704 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000705
bellardea2384d2004-08-01 21:59:26 +0000706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000707
708 /* Real path is meaningless for protocols */
709 if (is_protocol)
710 snprintf(backing_filename, sizeof(backing_filename),
711 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000712 else if (!realpath(filename, backing_filename))
713 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000714
Kevin Wolf91a073a2009-05-27 14:48:06 +0200715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
717
Jes Sorensen3e829902010-05-27 16:20:30 +0200718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
720 if (drv) {
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
722 drv->format_name);
723 }
724
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200726 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000727 if (ret < 0) {
728 return ret;
bellardea2384d2004-08-01 21:59:26 +0000729 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200730
bellardea2384d2004-08-01 21:59:26 +0000731 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200732 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000733 bs->is_temporary = 1;
734 }
bellard712e7872005-04-28 21:09:32 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200737 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200738 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000739 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100740
aliguori51d7c002009-03-05 23:00:29 +0000741 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000742 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000743 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200744
745 /* Open the image */
746 ret = bdrv_open_common(bs, filename, flags, drv);
747 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100748 goto unlink_and_fail;
749 }
750
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
754 int back_flags;
755 BlockDriver *back_drv = NULL;
756
757 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000758
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
761 bs->backing_file);
762 } else {
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
765 }
766
767 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200768 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000769 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200770
771 /* backing files always opened read-only */
772 back_flags =
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
774
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
776 if (ret < 0) {
777 bdrv_close(bs);
778 return ret;
779 }
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
782 } else {
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
785 }
786 }
787
788 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200789 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200790 }
791
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
795 }
796
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 return 0;
798
799unlink_and_fail:
800 if (bs->is_temporary) {
801 unlink(filename);
802 }
803 return ret;
804}
805
bellardfc01f7e2003-06-30 10:03:06 +0000806void bdrv_close(BlockDriverState *bs)
807{
bellard19cb3732006-08-19 11:45:59 +0000808 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200809 if (bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100812 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000813 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100814 bs->backing_hd = NULL;
815 }
bellardea2384d2004-08-01 21:59:26 +0000816 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500817 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000818#ifdef _WIN32
819 if (bs->is_temporary) {
820 unlink(bs->filename);
821 }
bellard67b915a2004-03-31 23:37:16 +0000822#endif
bellardea2384d2004-08-01 21:59:26 +0000823 bs->opaque = NULL;
824 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000825 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000826
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
829 }
830
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200831 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000832 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800833
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
837 }
bellardb3380822004-03-14 21:38:54 +0000838}
839
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900840void bdrv_close_all(void)
841{
842 BlockDriverState *bs;
843
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
845 bdrv_close(bs);
846 }
847}
848
Ryan Harperd22b2f42011-03-29 20:51:47 -0500849/* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851void bdrv_make_anon(BlockDriverState *bs)
852{
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
855 }
856 bs->device_name[0] = '\0';
857}
858
bellardb3380822004-03-14 21:38:54 +0000859void bdrv_delete(BlockDriverState *bs)
860{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200861 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200862
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100863 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500864 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000865
bellardb3380822004-03-14 21:38:54 +0000866 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
869 }
870
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200871 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500872 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000873}
874
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200875int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200877{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200878 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200879 return -EBUSY;
880 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200881 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300882 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200883 return 0;
884}
885
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200886/* TODO qdevified devices don't use this, remove when devices are qdevified */
887void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200888{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200889 if (bdrv_attach_dev(bs, dev) < 0) {
890 abort();
891 }
892}
893
894void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895/* TODO change to DeviceState *dev when all users are qdevified */
896{
897 assert(bs->dev == dev);
898 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200899 bs->dev_ops = NULL;
900 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200901 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200902}
903
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200904/* TODO change to return DeviceState * when all users are qdevified */
905void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200906{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200907 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200908}
909
Markus Armbruster0e49de52011-08-03 15:07:41 +0200910void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
911 void *opaque)
912{
913 bs->dev_ops = ops;
914 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
916 bs_snapshots = NULL;
917 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200918}
919
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200920static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200921{
Markus Armbruster145feb12011-08-03 15:07:42 +0200922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200924 }
925}
926
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200927bool bdrv_dev_has_removable_media(BlockDriverState *bs)
928{
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
930}
931
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100932void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
933{
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
936 }
937}
938
Markus Armbrustere4def802011-09-06 18:58:53 +0200939bool bdrv_dev_is_tray_open(BlockDriverState *bs)
940{
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
943 }
944 return false;
945}
946
Markus Armbruster145feb12011-08-03 15:07:42 +0200947static void bdrv_dev_resize_cb(BlockDriverState *bs)
948{
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200951 }
952}
953
Markus Armbrusterf1076392011-09-06 18:58:46 +0200954bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
955{
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
958 }
959 return false;
960}
961
aliguorie97fc192009-04-21 23:11:50 +0000962/*
963 * Run consistency checks on an image
964 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200965 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200966 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200967 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000968 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200969int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000970{
971 if (bs->drv->bdrv_check == NULL) {
972 return -ENOTSUP;
973 }
974
Kevin Wolfe076f332010-06-29 11:43:13 +0200975 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200976 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000977}
978
Kevin Wolf8a426612010-07-16 17:17:01 +0200979#define COMMIT_BUF_SECTORS 2048
980
bellard33e39632003-07-06 17:15:21 +0000981/* commit COW file into the raw image */
982int bdrv_commit(BlockDriverState *bs)
983{
bellard19cb3732006-08-19 11:45:59 +0000984 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +0200985 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +0200986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200988 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +0200989 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200990 char filename[1024];
991 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +0000992
bellard19cb3732006-08-19 11:45:59 +0000993 if (!drv)
994 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200995
996 if (!bs->backing_hd) {
997 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +0000998 }
999
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001000 if (bs->backing_hd->keep_read_only) {
1001 return -EACCES;
1002 }
Kevin Wolfee181192010-08-05 13:05:22 +02001003
1004 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1008
1009 if (ro) {
1010 /* re-open as RW */
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1015 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001016 if (rw_ret < 0) {
1017 bdrv_delete(bs_rw);
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1021 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001022 if (ret < 0) {
1023 bdrv_delete(bs_ro);
1024 /* drive not functional anymore */
1025 bs->drv = NULL;
1026 return ret;
1027 }
1028 bs->backing_hd = bs_ro;
1029 return rw_ret;
1030 }
1031 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001032 }
bellardea2384d2004-08-01 21:59:26 +00001033
Jan Kiszka6ea44302009-11-30 18:21:19 +01001034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001036
Kevin Wolf8a426612010-07-16 17:17:01 +02001037 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001039
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1041 ret = -EIO;
1042 goto ro_cleanup;
1043 }
1044
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1046 ret = -EIO;
1047 goto ro_cleanup;
1048 }
bellardea2384d2004-08-01 21:59:26 +00001049 }
1050 }
bellard95389c82005-12-18 18:28:15 +00001051
Christoph Hellwig1d449522010-01-17 12:32:30 +01001052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1054 bdrv_flush(bs);
1055 }
bellard95389c82005-12-18 18:28:15 +00001056
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001057 /*
1058 * Make sure all data we wrote to the backing device is actually
1059 * stable on disk.
1060 */
1061 if (bs->backing_hd)
1062 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001063
1064ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001066
1067 if (ro) {
1068 /* re-open as RO */
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1073 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001074 if (ret < 0) {
1075 bdrv_delete(bs_ro);
1076 /* drive not functional anymore */
1077 bs->drv = NULL;
1078 return ret;
1079 }
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1082 }
1083
Christoph Hellwig1d449522010-01-17 12:32:30 +01001084 return ret;
bellard33e39632003-07-06 17:15:21 +00001085}
1086
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001087void bdrv_commit_all(void)
1088{
1089 BlockDriverState *bs;
1090
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1092 bdrv_commit(bs);
1093 }
1094}
1095
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001096struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1098 int64_t sector_num;
1099 int nb_sectors;
1100 bool is_write;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001102 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001103};
1104
1105/**
1106 * Remove an active request from the tracked requests list
1107 *
1108 * This function should be called when a tracked request is completing.
1109 */
1110static void tracked_request_end(BdrvTrackedRequest *req)
1111{
1112 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001113 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001114}
1115
1116/**
1117 * Add an active request to the tracked requests list
1118 */
1119static void tracked_request_begin(BdrvTrackedRequest *req,
1120 BlockDriverState *bs,
1121 int64_t sector_num,
1122 int nb_sectors, bool is_write)
1123{
1124 *req = (BdrvTrackedRequest){
1125 .bs = bs,
1126 .sector_num = sector_num,
1127 .nb_sectors = nb_sectors,
1128 .is_write = is_write,
1129 };
1130
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001131 qemu_co_queue_init(&req->wait_queue);
1132
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001133 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1134}
1135
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001136/**
1137 * Round a region to cluster boundaries
1138 */
1139static void round_to_clusters(BlockDriverState *bs,
1140 int64_t sector_num, int nb_sectors,
1141 int64_t *cluster_sector_num,
1142 int *cluster_nb_sectors)
1143{
1144 BlockDriverInfo bdi;
1145
1146 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1147 *cluster_sector_num = sector_num;
1148 *cluster_nb_sectors = nb_sectors;
1149 } else {
1150 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1151 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1152 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1153 nb_sectors, c);
1154 }
1155}
1156
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001157static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1158 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001159 /* aaaa bbbb */
1160 if (sector_num >= req->sector_num + req->nb_sectors) {
1161 return false;
1162 }
1163 /* bbbb aaaa */
1164 if (req->sector_num >= sector_num + nb_sectors) {
1165 return false;
1166 }
1167 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001168}
1169
1170static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1171 int64_t sector_num, int nb_sectors)
1172{
1173 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001174 int64_t cluster_sector_num;
1175 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001176 bool retry;
1177
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001178 /* If we touch the same cluster it counts as an overlap. This guarantees
1179 * that allocating writes will be serialized and not race with each other
1180 * for the same cluster. For example, in copy-on-read it ensures that the
1181 * CoR read and write operations are atomic and guest writes cannot
1182 * interleave between them.
1183 */
1184 round_to_clusters(bs, sector_num, nb_sectors,
1185 &cluster_sector_num, &cluster_nb_sectors);
1186
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001187 do {
1188 retry = false;
1189 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001190 if (tracked_request_overlaps(req, cluster_sector_num,
1191 cluster_nb_sectors)) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001192 qemu_co_queue_wait(&req->wait_queue);
1193 retry = true;
1194 break;
1195 }
1196 }
1197 } while (retry);
1198}
1199
Kevin Wolf756e6732010-01-12 12:55:17 +01001200/*
1201 * Return values:
1202 * 0 - success
1203 * -EINVAL - backing format specified, but no file
1204 * -ENOSPC - can't update the backing file because no space is left in the
1205 * image file header
1206 * -ENOTSUP - format driver doesn't support changing the backing file
1207 */
1208int bdrv_change_backing_file(BlockDriverState *bs,
1209 const char *backing_file, const char *backing_fmt)
1210{
1211 BlockDriver *drv = bs->drv;
1212
1213 if (drv->bdrv_change_backing_file != NULL) {
1214 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1215 } else {
1216 return -ENOTSUP;
1217 }
1218}
1219
aliguori71d07702009-03-03 17:37:16 +00001220static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1221 size_t size)
1222{
1223 int64_t len;
1224
1225 if (!bdrv_is_inserted(bs))
1226 return -ENOMEDIUM;
1227
1228 if (bs->growable)
1229 return 0;
1230
1231 len = bdrv_getlength(bs);
1232
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001233 if (offset < 0)
1234 return -EIO;
1235
1236 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001237 return -EIO;
1238
1239 return 0;
1240}
1241
1242static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1243 int nb_sectors)
1244{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001245 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1246 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001247}
1248
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001249typedef struct RwCo {
1250 BlockDriverState *bs;
1251 int64_t sector_num;
1252 int nb_sectors;
1253 QEMUIOVector *qiov;
1254 bool is_write;
1255 int ret;
1256} RwCo;
1257
1258static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1259{
1260 RwCo *rwco = opaque;
1261
1262 if (!rwco->is_write) {
1263 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1264 rwco->nb_sectors, rwco->qiov);
1265 } else {
1266 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1267 rwco->nb_sectors, rwco->qiov);
1268 }
1269}
1270
1271/*
1272 * Process a synchronous request using coroutines
1273 */
1274static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1275 int nb_sectors, bool is_write)
1276{
1277 QEMUIOVector qiov;
1278 struct iovec iov = {
1279 .iov_base = (void *)buf,
1280 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1281 };
1282 Coroutine *co;
1283 RwCo rwco = {
1284 .bs = bs,
1285 .sector_num = sector_num,
1286 .nb_sectors = nb_sectors,
1287 .qiov = &qiov,
1288 .is_write = is_write,
1289 .ret = NOT_DONE,
1290 };
1291
1292 qemu_iovec_init_external(&qiov, &iov, 1);
1293
1294 if (qemu_in_coroutine()) {
1295 /* Fast-path if already in coroutine context */
1296 bdrv_rw_co_entry(&rwco);
1297 } else {
1298 co = qemu_coroutine_create(bdrv_rw_co_entry);
1299 qemu_coroutine_enter(co, &rwco);
1300 while (rwco.ret == NOT_DONE) {
1301 qemu_aio_wait();
1302 }
1303 }
1304 return rwco.ret;
1305}
1306
bellard19cb3732006-08-19 11:45:59 +00001307/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001308int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001309 uint8_t *buf, int nb_sectors)
1310{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001311 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001312}
1313
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001314static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001315 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001316{
1317 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001318 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001319
Jan Kiszka6ea44302009-11-30 18:21:19 +01001320 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001321 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001322
1323 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001324 idx = start / (sizeof(unsigned long) * 8);
1325 bit = start % (sizeof(unsigned long) * 8);
1326 val = bs->dirty_bitmap[idx];
1327 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001328 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001329 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001330 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001331 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001332 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001333 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001334 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001335 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001336 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001337 }
1338 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001339 }
1340}
1341
ths5fafdf22007-09-16 21:08:06 +00001342/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001343 -EIO generic I/O error (may happen for all errors)
1344 -ENOMEDIUM No media inserted.
1345 -EINVAL Invalid sector number or nb_sectors
1346 -EACCES Trying to write a read-only device
1347*/
ths5fafdf22007-09-16 21:08:06 +00001348int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001349 const uint8_t *buf, int nb_sectors)
1350{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001351 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001352}
1353
aliguorieda578e2009-03-12 19:57:16 +00001354int bdrv_pread(BlockDriverState *bs, int64_t offset,
1355 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001356{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001357 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001358 int len, nb_sectors, count;
1359 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001360 int ret;
bellard83f64092006-08-01 16:21:11 +00001361
1362 count = count1;
1363 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001364 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001365 if (len > count)
1366 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001367 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001368 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001369 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1370 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001371 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001372 count -= len;
1373 if (count == 0)
1374 return count1;
1375 sector_num++;
1376 buf += len;
1377 }
1378
1379 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001380 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001381 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001382 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1383 return ret;
bellard83f64092006-08-01 16:21:11 +00001384 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001385 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001386 buf += len;
1387 count -= len;
1388 }
1389
1390 /* add data from the last sector */
1391 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001392 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1393 return ret;
bellard83f64092006-08-01 16:21:11 +00001394 memcpy(buf, tmp_buf, count);
1395 }
1396 return count1;
1397}
1398
aliguorieda578e2009-03-12 19:57:16 +00001399int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1400 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001401{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001402 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001403 int len, nb_sectors, count;
1404 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001405 int ret;
bellard83f64092006-08-01 16:21:11 +00001406
1407 count = count1;
1408 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001409 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001410 if (len > count)
1411 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001412 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001413 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001414 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1415 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001416 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001417 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1418 return ret;
bellard83f64092006-08-01 16:21:11 +00001419 count -= len;
1420 if (count == 0)
1421 return count1;
1422 sector_num++;
1423 buf += len;
1424 }
1425
1426 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001427 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001428 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001429 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1430 return ret;
bellard83f64092006-08-01 16:21:11 +00001431 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001432 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001433 buf += len;
1434 count -= len;
1435 }
1436
1437 /* add data from the last sector */
1438 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001439 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1440 return ret;
bellard83f64092006-08-01 16:21:11 +00001441 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001442 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1443 return ret;
bellard83f64092006-08-01 16:21:11 +00001444 }
1445 return count1;
1446}
bellard83f64092006-08-01 16:21:11 +00001447
Kevin Wolff08145f2010-06-16 16:38:15 +02001448/*
1449 * Writes to the file and ensures that no writes are reordered across this
1450 * request (acts as a barrier)
1451 *
1452 * Returns 0 on success, -errno in error cases.
1453 */
1454int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1455 const void *buf, int count)
1456{
1457 int ret;
1458
1459 ret = bdrv_pwrite(bs, offset, buf, count);
1460 if (ret < 0) {
1461 return ret;
1462 }
1463
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001464 /* No flush needed for cache modes that use O_DSYNC */
1465 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001466 bdrv_flush(bs);
1467 }
1468
1469 return 0;
1470}
1471
Stefan Hajnocziab185922011-11-17 13:40:31 +00001472static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1473 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1474{
1475 /* Perform I/O through a temporary buffer so that users who scribble over
1476 * their read buffer while the operation is in progress do not end up
1477 * modifying the image file. This is critical for zero-copy guest I/O
1478 * where anything might happen inside guest memory.
1479 */
1480 void *bounce_buffer;
1481
1482 struct iovec iov;
1483 QEMUIOVector bounce_qiov;
1484 int64_t cluster_sector_num;
1485 int cluster_nb_sectors;
1486 size_t skip_bytes;
1487 int ret;
1488
1489 /* Cover entire cluster so no additional backing file I/O is required when
1490 * allocating cluster in the image file.
1491 */
1492 round_to_clusters(bs, sector_num, nb_sectors,
1493 &cluster_sector_num, &cluster_nb_sectors);
1494
1495 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1496 cluster_sector_num, cluster_nb_sectors);
1497
1498 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1499 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1500 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1501
1502 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1503 &bounce_qiov);
1504 if (ret < 0) {
1505 goto err;
1506 }
1507
1508 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1509 &bounce_qiov);
1510 if (ret < 0) {
1511 /* It might be okay to ignore write errors for guest requests. If this
1512 * is a deliberate copy-on-read then we don't want to ignore the error.
1513 * Simply report it in all cases.
1514 */
1515 goto err;
1516 }
1517
1518 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1519 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1520 nb_sectors * BDRV_SECTOR_SIZE);
1521
1522err:
1523 qemu_vfree(bounce_buffer);
1524 return ret;
1525}
1526
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001527/*
1528 * Handle a read request in coroutine context
1529 */
1530static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1531 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001532{
1533 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001534 BdrvTrackedRequest req;
1535 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001536
Kevin Wolfda1fa912011-07-14 17:27:13 +02001537 if (!drv) {
1538 return -ENOMEDIUM;
1539 }
1540 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1541 return -EIO;
1542 }
1543
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001544 /* throttling disk read I/O */
1545 if (bs->io_limits_enabled) {
1546 bdrv_io_limits_intercept(bs, false, nb_sectors);
1547 }
1548
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001549 if (bs->copy_on_read) {
1550 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1551 }
1552
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001553 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001554
1555 if (bs->copy_on_read) {
1556 int pnum;
1557
1558 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1559 if (ret < 0) {
1560 goto out;
1561 }
1562
1563 if (!ret || pnum != nb_sectors) {
1564 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1565 goto out;
1566 }
1567 }
1568
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001569 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001570
1571out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001572 tracked_request_end(&req);
1573 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001574}
1575
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001576int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001577 int nb_sectors, QEMUIOVector *qiov)
1578{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001579 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001580
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001581 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1582}
1583
1584/*
1585 * Handle a write request in coroutine context
1586 */
1587static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1588 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1589{
1590 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001591 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001592 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001593
1594 if (!bs->drv) {
1595 return -ENOMEDIUM;
1596 }
1597 if (bs->read_only) {
1598 return -EACCES;
1599 }
1600 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1601 return -EIO;
1602 }
1603
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001604 /* throttling disk write I/O */
1605 if (bs->io_limits_enabled) {
1606 bdrv_io_limits_intercept(bs, true, nb_sectors);
1607 }
1608
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001609 if (bs->copy_on_read) {
1610 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1611 }
1612
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001613 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1614
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001615 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1616
Kevin Wolfda1fa912011-07-14 17:27:13 +02001617 if (bs->dirty_bitmap) {
1618 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1619 }
1620
1621 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1622 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1623 }
1624
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001625 tracked_request_end(&req);
1626
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001627 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001628}
1629
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001630int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1631 int nb_sectors, QEMUIOVector *qiov)
1632{
1633 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1634
1635 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1636}
1637
bellard83f64092006-08-01 16:21:11 +00001638/**
bellard83f64092006-08-01 16:21:11 +00001639 * Truncate file to 'offset' bytes (needed only for file protocols)
1640 */
1641int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1642{
1643 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001644 int ret;
bellard83f64092006-08-01 16:21:11 +00001645 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001646 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001647 if (!drv->bdrv_truncate)
1648 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001649 if (bs->read_only)
1650 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001651 if (bdrv_in_use(bs))
1652 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001653 ret = drv->bdrv_truncate(bs, offset);
1654 if (ret == 0) {
1655 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001656 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001657 }
1658 return ret;
bellard83f64092006-08-01 16:21:11 +00001659}
1660
1661/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001662 * Length of a allocated file in bytes. Sparse files are counted by actual
1663 * allocated space. Return < 0 if error or unknown.
1664 */
1665int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1666{
1667 BlockDriver *drv = bs->drv;
1668 if (!drv) {
1669 return -ENOMEDIUM;
1670 }
1671 if (drv->bdrv_get_allocated_file_size) {
1672 return drv->bdrv_get_allocated_file_size(bs);
1673 }
1674 if (bs->file) {
1675 return bdrv_get_allocated_file_size(bs->file);
1676 }
1677 return -ENOTSUP;
1678}
1679
1680/**
bellard83f64092006-08-01 16:21:11 +00001681 * Length of a file in bytes. Return < 0 if error or unknown.
1682 */
1683int64_t bdrv_getlength(BlockDriverState *bs)
1684{
1685 BlockDriver *drv = bs->drv;
1686 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001687 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001688
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001689 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001690 if (drv->bdrv_getlength) {
1691 return drv->bdrv_getlength(bs);
1692 }
bellard83f64092006-08-01 16:21:11 +00001693 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001694 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001695}
1696
bellard19cb3732006-08-19 11:45:59 +00001697/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001698void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001699{
bellard19cb3732006-08-19 11:45:59 +00001700 int64_t length;
1701 length = bdrv_getlength(bs);
1702 if (length < 0)
1703 length = 0;
1704 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001705 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001706 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001707}
bellardcf989512004-02-16 21:56:36 +00001708
aliguorif3d54fc2008-11-25 21:50:24 +00001709struct partition {
1710 uint8_t boot_ind; /* 0x80 - active */
1711 uint8_t head; /* starting head */
1712 uint8_t sector; /* starting sector */
1713 uint8_t cyl; /* starting cylinder */
1714 uint8_t sys_ind; /* What partition type */
1715 uint8_t end_head; /* end head */
1716 uint8_t end_sector; /* end sector */
1717 uint8_t end_cyl; /* end cylinder */
1718 uint32_t start_sect; /* starting sector counting from 0 */
1719 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001720} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001721
1722/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1723static int guess_disk_lchs(BlockDriverState *bs,
1724 int *pcylinders, int *pheads, int *psectors)
1725{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001726 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001727 int ret, i, heads, sectors, cylinders;
1728 struct partition *p;
1729 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001730 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001731
1732 bdrv_get_geometry(bs, &nb_sectors);
1733
1734 ret = bdrv_read(bs, 0, buf, 1);
1735 if (ret < 0)
1736 return -1;
1737 /* test msdos magic */
1738 if (buf[510] != 0x55 || buf[511] != 0xaa)
1739 return -1;
1740 for(i = 0; i < 4; i++) {
1741 p = ((struct partition *)(buf + 0x1be)) + i;
1742 nr_sects = le32_to_cpu(p->nr_sects);
1743 if (nr_sects && p->end_head) {
1744 /* We make the assumption that the partition terminates on
1745 a cylinder boundary */
1746 heads = p->end_head + 1;
1747 sectors = p->end_sector & 63;
1748 if (sectors == 0)
1749 continue;
1750 cylinders = nb_sectors / (heads * sectors);
1751 if (cylinders < 1 || cylinders > 16383)
1752 continue;
1753 *pheads = heads;
1754 *psectors = sectors;
1755 *pcylinders = cylinders;
1756#if 0
1757 printf("guessed geometry: LCHS=%d %d %d\n",
1758 cylinders, heads, sectors);
1759#endif
1760 return 0;
1761 }
1762 }
1763 return -1;
1764}
1765
1766void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1767{
1768 int translation, lba_detected = 0;
1769 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001770 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001771
1772 /* if a geometry hint is available, use it */
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1775 translation = bdrv_get_translation_hint(bs);
1776 if (cylinders != 0) {
1777 *pcyls = cylinders;
1778 *pheads = heads;
1779 *psecs = secs;
1780 } else {
1781 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1782 if (heads > 16) {
1783 /* if heads > 16, it means that a BIOS LBA
1784 translation was active, so the default
1785 hardware geometry is OK */
1786 lba_detected = 1;
1787 goto default_geometry;
1788 } else {
1789 *pcyls = cylinders;
1790 *pheads = heads;
1791 *psecs = secs;
1792 /* disable any translation to be in sync with
1793 the logical geometry */
1794 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1795 bdrv_set_translation_hint(bs,
1796 BIOS_ATA_TRANSLATION_NONE);
1797 }
1798 }
1799 } else {
1800 default_geometry:
1801 /* if no geometry, use a standard physical disk geometry */
1802 cylinders = nb_sectors / (16 * 63);
1803
1804 if (cylinders > 16383)
1805 cylinders = 16383;
1806 else if (cylinders < 2)
1807 cylinders = 2;
1808 *pcyls = cylinders;
1809 *pheads = 16;
1810 *psecs = 63;
1811 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1812 if ((*pcyls * *pheads) <= 131072) {
1813 bdrv_set_translation_hint(bs,
1814 BIOS_ATA_TRANSLATION_LARGE);
1815 } else {
1816 bdrv_set_translation_hint(bs,
1817 BIOS_ATA_TRANSLATION_LBA);
1818 }
1819 }
1820 }
1821 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1822 }
1823}
1824
ths5fafdf22007-09-16 21:08:06 +00001825void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001826 int cyls, int heads, int secs)
1827{
1828 bs->cyls = cyls;
1829 bs->heads = heads;
1830 bs->secs = secs;
1831}
1832
bellard46d47672004-11-16 01:45:27 +00001833void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1834{
1835 bs->translation = translation;
1836}
1837
ths5fafdf22007-09-16 21:08:06 +00001838void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001839 int *pcyls, int *pheads, int *psecs)
1840{
1841 *pcyls = bs->cyls;
1842 *pheads = bs->heads;
1843 *psecs = bs->secs;
1844}
1845
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001846/* throttling disk io limits */
1847void bdrv_set_io_limits(BlockDriverState *bs,
1848 BlockIOLimit *io_limits)
1849{
1850 bs->io_limits = *io_limits;
1851 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1852}
1853
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001854/* Recognize floppy formats */
1855typedef struct FDFormat {
1856 FDriveType drive;
1857 uint8_t last_sect;
1858 uint8_t max_track;
1859 uint8_t max_head;
1860} FDFormat;
1861
1862static const FDFormat fd_formats[] = {
1863 /* First entry is default format */
1864 /* 1.44 MB 3"1/2 floppy disks */
1865 { FDRIVE_DRV_144, 18, 80, 1, },
1866 { FDRIVE_DRV_144, 20, 80, 1, },
1867 { FDRIVE_DRV_144, 21, 80, 1, },
1868 { FDRIVE_DRV_144, 21, 82, 1, },
1869 { FDRIVE_DRV_144, 21, 83, 1, },
1870 { FDRIVE_DRV_144, 22, 80, 1, },
1871 { FDRIVE_DRV_144, 23, 80, 1, },
1872 { FDRIVE_DRV_144, 24, 80, 1, },
1873 /* 2.88 MB 3"1/2 floppy disks */
1874 { FDRIVE_DRV_288, 36, 80, 1, },
1875 { FDRIVE_DRV_288, 39, 80, 1, },
1876 { FDRIVE_DRV_288, 40, 80, 1, },
1877 { FDRIVE_DRV_288, 44, 80, 1, },
1878 { FDRIVE_DRV_288, 48, 80, 1, },
1879 /* 720 kB 3"1/2 floppy disks */
1880 { FDRIVE_DRV_144, 9, 80, 1, },
1881 { FDRIVE_DRV_144, 10, 80, 1, },
1882 { FDRIVE_DRV_144, 10, 82, 1, },
1883 { FDRIVE_DRV_144, 10, 83, 1, },
1884 { FDRIVE_DRV_144, 13, 80, 1, },
1885 { FDRIVE_DRV_144, 14, 80, 1, },
1886 /* 1.2 MB 5"1/4 floppy disks */
1887 { FDRIVE_DRV_120, 15, 80, 1, },
1888 { FDRIVE_DRV_120, 18, 80, 1, },
1889 { FDRIVE_DRV_120, 18, 82, 1, },
1890 { FDRIVE_DRV_120, 18, 83, 1, },
1891 { FDRIVE_DRV_120, 20, 80, 1, },
1892 /* 720 kB 5"1/4 floppy disks */
1893 { FDRIVE_DRV_120, 9, 80, 1, },
1894 { FDRIVE_DRV_120, 11, 80, 1, },
1895 /* 360 kB 5"1/4 floppy disks */
1896 { FDRIVE_DRV_120, 9, 40, 1, },
1897 { FDRIVE_DRV_120, 9, 40, 0, },
1898 { FDRIVE_DRV_120, 10, 41, 1, },
1899 { FDRIVE_DRV_120, 10, 42, 1, },
1900 /* 320 kB 5"1/4 floppy disks */
1901 { FDRIVE_DRV_120, 8, 40, 1, },
1902 { FDRIVE_DRV_120, 8, 40, 0, },
1903 /* 360 kB must match 5"1/4 better than 3"1/2... */
1904 { FDRIVE_DRV_144, 9, 80, 0, },
1905 /* end */
1906 { FDRIVE_DRV_NONE, -1, -1, 0, },
1907};
1908
1909void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1910 int *max_track, int *last_sect,
1911 FDriveType drive_in, FDriveType *drive)
1912{
1913 const FDFormat *parse;
1914 uint64_t nb_sectors, size;
1915 int i, first_match, match;
1916
1917 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1918 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1919 /* User defined disk */
1920 } else {
1921 bdrv_get_geometry(bs, &nb_sectors);
1922 match = -1;
1923 first_match = -1;
1924 for (i = 0; ; i++) {
1925 parse = &fd_formats[i];
1926 if (parse->drive == FDRIVE_DRV_NONE) {
1927 break;
1928 }
1929 if (drive_in == parse->drive ||
1930 drive_in == FDRIVE_DRV_NONE) {
1931 size = (parse->max_head + 1) * parse->max_track *
1932 parse->last_sect;
1933 if (nb_sectors == size) {
1934 match = i;
1935 break;
1936 }
1937 if (first_match == -1) {
1938 first_match = i;
1939 }
1940 }
1941 }
1942 if (match == -1) {
1943 if (first_match == -1) {
1944 match = 1;
1945 } else {
1946 match = first_match;
1947 }
1948 parse = &fd_formats[match];
1949 }
1950 *nb_heads = parse->max_head + 1;
1951 *max_track = parse->max_track;
1952 *last_sect = parse->last_sect;
1953 *drive = parse->drive;
1954 }
1955}
1956
bellard46d47672004-11-16 01:45:27 +00001957int bdrv_get_translation_hint(BlockDriverState *bs)
1958{
1959 return bs->translation;
1960}
1961
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001962void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1963 BlockErrorAction on_write_error)
1964{
1965 bs->on_read_error = on_read_error;
1966 bs->on_write_error = on_write_error;
1967}
1968
1969BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1970{
1971 return is_read ? bs->on_read_error : bs->on_write_error;
1972}
1973
bellardb3380822004-03-14 21:38:54 +00001974int bdrv_is_read_only(BlockDriverState *bs)
1975{
1976 return bs->read_only;
1977}
1978
ths985a03b2007-12-24 16:10:43 +00001979int bdrv_is_sg(BlockDriverState *bs)
1980{
1981 return bs->sg;
1982}
1983
Christoph Hellwige900a7b2009-09-04 19:01:15 +02001984int bdrv_enable_write_cache(BlockDriverState *bs)
1985{
1986 return bs->enable_write_cache;
1987}
1988
bellardea2384d2004-08-01 21:59:26 +00001989int bdrv_is_encrypted(BlockDriverState *bs)
1990{
1991 if (bs->backing_hd && bs->backing_hd->encrypted)
1992 return 1;
1993 return bs->encrypted;
1994}
1995
aliguoric0f4ce72009-03-05 23:01:01 +00001996int bdrv_key_required(BlockDriverState *bs)
1997{
1998 BlockDriverState *backing_hd = bs->backing_hd;
1999
2000 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2001 return 1;
2002 return (bs->encrypted && !bs->valid_key);
2003}
2004
bellardea2384d2004-08-01 21:59:26 +00002005int bdrv_set_key(BlockDriverState *bs, const char *key)
2006{
2007 int ret;
2008 if (bs->backing_hd && bs->backing_hd->encrypted) {
2009 ret = bdrv_set_key(bs->backing_hd, key);
2010 if (ret < 0)
2011 return ret;
2012 if (!bs->encrypted)
2013 return 0;
2014 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002015 if (!bs->encrypted) {
2016 return -EINVAL;
2017 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2018 return -ENOMEDIUM;
2019 }
aliguoric0f4ce72009-03-05 23:01:01 +00002020 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002021 if (ret < 0) {
2022 bs->valid_key = 0;
2023 } else if (!bs->valid_key) {
2024 bs->valid_key = 1;
2025 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002026 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002027 }
aliguoric0f4ce72009-03-05 23:01:01 +00002028 return ret;
bellardea2384d2004-08-01 21:59:26 +00002029}
2030
2031void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2032{
bellard19cb3732006-08-19 11:45:59 +00002033 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002034 buf[0] = '\0';
2035 } else {
2036 pstrcpy(buf, buf_size, bs->drv->format_name);
2037 }
2038}
2039
ths5fafdf22007-09-16 21:08:06 +00002040void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002041 void *opaque)
2042{
2043 BlockDriver *drv;
2044
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002045 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002046 it(opaque, drv->format_name);
2047 }
2048}
2049
bellardb3380822004-03-14 21:38:54 +00002050BlockDriverState *bdrv_find(const char *name)
2051{
2052 BlockDriverState *bs;
2053
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002054 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2055 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002056 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002057 }
bellardb3380822004-03-14 21:38:54 +00002058 }
2059 return NULL;
2060}
2061
Markus Armbruster2f399b02010-06-02 18:55:20 +02002062BlockDriverState *bdrv_next(BlockDriverState *bs)
2063{
2064 if (!bs) {
2065 return QTAILQ_FIRST(&bdrv_states);
2066 }
2067 return QTAILQ_NEXT(bs, list);
2068}
2069
aliguori51de9762009-03-05 23:00:43 +00002070void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002071{
2072 BlockDriverState *bs;
2073
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002074 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002075 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002076 }
2077}
2078
bellardea2384d2004-08-01 21:59:26 +00002079const char *bdrv_get_device_name(BlockDriverState *bs)
2080{
2081 return bs->device_name;
2082}
2083
aliguoric6ca28d2008-10-06 13:55:43 +00002084void bdrv_flush_all(void)
2085{
2086 BlockDriverState *bs;
2087
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002088 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002089 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002090 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002091 }
2092 }
aliguoric6ca28d2008-10-06 13:55:43 +00002093}
2094
Kevin Wolff2feebb2010-04-14 17:30:35 +02002095int bdrv_has_zero_init(BlockDriverState *bs)
2096{
2097 assert(bs->drv);
2098
Kevin Wolf336c1c12010-07-28 11:26:29 +02002099 if (bs->drv->bdrv_has_zero_init) {
2100 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002101 }
2102
2103 return 1;
2104}
2105
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002106typedef struct BdrvCoIsAllocatedData {
2107 BlockDriverState *bs;
2108 int64_t sector_num;
2109 int nb_sectors;
2110 int *pnum;
2111 int ret;
2112 bool done;
2113} BdrvCoIsAllocatedData;
2114
thsf58c7b32008-06-05 21:53:49 +00002115/*
2116 * Returns true iff the specified sector is present in the disk image. Drivers
2117 * not implementing the functionality are assumed to not support backing files,
2118 * hence all their sectors are reported as allocated.
2119 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002120 * If 'sector_num' is beyond the end of the disk image the return value is 0
2121 * and 'pnum' is set to 0.
2122 *
thsf58c7b32008-06-05 21:53:49 +00002123 * 'pnum' is set to the number of sectors (including and immediately following
2124 * the specified sector) that are known to be in the same
2125 * allocated/unallocated state.
2126 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002127 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2128 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002129 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002130int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2131 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002132{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002133 int64_t n;
2134
2135 if (sector_num >= bs->total_sectors) {
2136 *pnum = 0;
2137 return 0;
2138 }
2139
2140 n = bs->total_sectors - sector_num;
2141 if (n < nb_sectors) {
2142 nb_sectors = n;
2143 }
2144
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002145 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002146 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002147 return 1;
2148 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002149
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002150 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2151}
2152
2153/* Coroutine wrapper for bdrv_is_allocated() */
2154static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2155{
2156 BdrvCoIsAllocatedData *data = opaque;
2157 BlockDriverState *bs = data->bs;
2158
2159 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2160 data->pnum);
2161 data->done = true;
2162}
2163
2164/*
2165 * Synchronous wrapper around bdrv_co_is_allocated().
2166 *
2167 * See bdrv_co_is_allocated() for details.
2168 */
2169int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2170 int *pnum)
2171{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002172 Coroutine *co;
2173 BdrvCoIsAllocatedData data = {
2174 .bs = bs,
2175 .sector_num = sector_num,
2176 .nb_sectors = nb_sectors,
2177 .pnum = pnum,
2178 .done = false,
2179 };
2180
2181 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2182 qemu_coroutine_enter(co, &data);
2183 while (!data.done) {
2184 qemu_aio_wait();
2185 }
2186 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002187}
2188
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002189void bdrv_mon_event(const BlockDriverState *bdrv,
2190 BlockMonEventAction action, int is_read)
2191{
2192 QObject *data;
2193 const char *action_str;
2194
2195 switch (action) {
2196 case BDRV_ACTION_REPORT:
2197 action_str = "report";
2198 break;
2199 case BDRV_ACTION_IGNORE:
2200 action_str = "ignore";
2201 break;
2202 case BDRV_ACTION_STOP:
2203 action_str = "stop";
2204 break;
2205 default:
2206 abort();
2207 }
2208
2209 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2210 bdrv->device_name,
2211 action_str,
2212 is_read ? "read" : "write");
2213 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2214
2215 qobject_decref(data);
2216}
2217
Luiz Capitulinob2023812011-09-21 17:16:47 -03002218BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002219{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002220 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002221 BlockDriverState *bs;
2222
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002223 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002224 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002225
Luiz Capitulinob2023812011-09-21 17:16:47 -03002226 info->value = g_malloc0(sizeof(*info->value));
2227 info->value->device = g_strdup(bs->device_name);
2228 info->value->type = g_strdup("unknown");
2229 info->value->locked = bdrv_dev_is_medium_locked(bs);
2230 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002231
Markus Armbrustere4def802011-09-06 18:58:53 +02002232 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002233 info->value->has_tray_open = true;
2234 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002235 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002236
2237 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002238 info->value->has_io_status = true;
2239 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002240 }
2241
bellard19cb3732006-08-19 11:45:59 +00002242 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002243 info->value->has_inserted = true;
2244 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2245 info->value->inserted->file = g_strdup(bs->filename);
2246 info->value->inserted->ro = bs->read_only;
2247 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2248 info->value->inserted->encrypted = bs->encrypted;
2249 if (bs->backing_file[0]) {
2250 info->value->inserted->has_backing_file = true;
2251 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002252 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002253
2254 if (bs->io_limits_enabled) {
2255 info->value->inserted->bps =
2256 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2257 info->value->inserted->bps_rd =
2258 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2259 info->value->inserted->bps_wr =
2260 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2261 info->value->inserted->iops =
2262 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2263 info->value->inserted->iops_rd =
2264 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2265 info->value->inserted->iops_wr =
2266 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2267 }
bellardb3380822004-03-14 21:38:54 +00002268 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002269
2270 /* XXX: waiting for the qapi to support GSList */
2271 if (!cur_item) {
2272 head = cur_item = info;
2273 } else {
2274 cur_item->next = info;
2275 cur_item = info;
2276 }
bellardb3380822004-03-14 21:38:54 +00002277 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002278
Luiz Capitulinob2023812011-09-21 17:16:47 -03002279 return head;
bellardb3380822004-03-14 21:38:54 +00002280}
thsa36e69d2007-12-02 05:18:19 +00002281
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002282/* Consider exposing this as a full fledged QMP command */
2283static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002284{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002285 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002286
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002287 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002288
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002289 if (bs->device_name[0]) {
2290 s->has_device = true;
2291 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002292 }
2293
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002294 s->stats = g_malloc0(sizeof(*s->stats));
2295 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2296 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2297 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2298 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2299 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2300 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2301 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2302 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2303 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2304
Kevin Wolf294cc352010-04-28 14:34:01 +02002305 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002306 s->has_parent = true;
2307 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002308 }
2309
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002310 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002311}
2312
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002313BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002314{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002315 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002316 BlockDriverState *bs;
2317
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002318 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002319 BlockStatsList *info = g_malloc0(sizeof(*info));
2320 info->value = qmp_query_blockstat(bs, NULL);
2321
2322 /* XXX: waiting for the qapi to support GSList */
2323 if (!cur_item) {
2324 head = cur_item = info;
2325 } else {
2326 cur_item->next = info;
2327 cur_item = info;
2328 }
thsa36e69d2007-12-02 05:18:19 +00002329 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002330
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002331 return head;
thsa36e69d2007-12-02 05:18:19 +00002332}
bellardea2384d2004-08-01 21:59:26 +00002333
aliguori045df332009-03-05 23:00:48 +00002334const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2335{
2336 if (bs->backing_hd && bs->backing_hd->encrypted)
2337 return bs->backing_file;
2338 else if (bs->encrypted)
2339 return bs->filename;
2340 else
2341 return NULL;
2342}
2343
ths5fafdf22007-09-16 21:08:06 +00002344void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002345 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002346{
Kevin Wolf3574c602011-10-26 11:02:11 +02002347 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002348}
2349
ths5fafdf22007-09-16 21:08:06 +00002350int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002351 const uint8_t *buf, int nb_sectors)
2352{
2353 BlockDriver *drv = bs->drv;
2354 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002355 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002356 if (!drv->bdrv_write_compressed)
2357 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002358 if (bdrv_check_request(bs, sector_num, nb_sectors))
2359 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002360
Jan Kiszkac6d22832009-11-30 18:21:20 +01002361 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002362 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2363 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002364
bellardfaea38e2006-08-05 21:31:00 +00002365 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2366}
ths3b46e622007-09-17 08:09:54 +00002367
bellardfaea38e2006-08-05 21:31:00 +00002368int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2369{
2370 BlockDriver *drv = bs->drv;
2371 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002372 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002373 if (!drv->bdrv_get_info)
2374 return -ENOTSUP;
2375 memset(bdi, 0, sizeof(*bdi));
2376 return drv->bdrv_get_info(bs, bdi);
2377}
2378
Christoph Hellwig45566e92009-07-10 23:11:57 +02002379int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2380 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002381{
2382 BlockDriver *drv = bs->drv;
2383 if (!drv)
2384 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002385 if (drv->bdrv_save_vmstate)
2386 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2387 if (bs->file)
2388 return bdrv_save_vmstate(bs->file, buf, pos, size);
2389 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002390}
2391
Christoph Hellwig45566e92009-07-10 23:11:57 +02002392int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2393 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002394{
2395 BlockDriver *drv = bs->drv;
2396 if (!drv)
2397 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002398 if (drv->bdrv_load_vmstate)
2399 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2400 if (bs->file)
2401 return bdrv_load_vmstate(bs->file, buf, pos, size);
2402 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002403}
2404
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002405void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2406{
2407 BlockDriver *drv = bs->drv;
2408
2409 if (!drv || !drv->bdrv_debug_event) {
2410 return;
2411 }
2412
2413 return drv->bdrv_debug_event(bs, event);
2414
2415}
2416
bellardfaea38e2006-08-05 21:31:00 +00002417/**************************************************************/
2418/* handling of snapshots */
2419
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002420int bdrv_can_snapshot(BlockDriverState *bs)
2421{
2422 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002423 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002424 return 0;
2425 }
2426
2427 if (!drv->bdrv_snapshot_create) {
2428 if (bs->file != NULL) {
2429 return bdrv_can_snapshot(bs->file);
2430 }
2431 return 0;
2432 }
2433
2434 return 1;
2435}
2436
Blue Swirl199630b2010-07-25 20:49:34 +00002437int bdrv_is_snapshot(BlockDriverState *bs)
2438{
2439 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2440}
2441
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002442BlockDriverState *bdrv_snapshots(void)
2443{
2444 BlockDriverState *bs;
2445
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002446 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002447 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002448 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002449
2450 bs = NULL;
2451 while ((bs = bdrv_next(bs))) {
2452 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002453 bs_snapshots = bs;
2454 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002455 }
2456 }
2457 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002458}
2459
ths5fafdf22007-09-16 21:08:06 +00002460int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002461 QEMUSnapshotInfo *sn_info)
2462{
2463 BlockDriver *drv = bs->drv;
2464 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002465 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002466 if (drv->bdrv_snapshot_create)
2467 return drv->bdrv_snapshot_create(bs, sn_info);
2468 if (bs->file)
2469 return bdrv_snapshot_create(bs->file, sn_info);
2470 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002471}
2472
ths5fafdf22007-09-16 21:08:06 +00002473int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002474 const char *snapshot_id)
2475{
2476 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002477 int ret, open_ret;
2478
bellardfaea38e2006-08-05 21:31:00 +00002479 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002480 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002481 if (drv->bdrv_snapshot_goto)
2482 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2483
2484 if (bs->file) {
2485 drv->bdrv_close(bs);
2486 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2487 open_ret = drv->bdrv_open(bs, bs->open_flags);
2488 if (open_ret < 0) {
2489 bdrv_delete(bs->file);
2490 bs->drv = NULL;
2491 return open_ret;
2492 }
2493 return ret;
2494 }
2495
2496 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002497}
2498
2499int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2500{
2501 BlockDriver *drv = bs->drv;
2502 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002503 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002504 if (drv->bdrv_snapshot_delete)
2505 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2506 if (bs->file)
2507 return bdrv_snapshot_delete(bs->file, snapshot_id);
2508 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002509}
2510
ths5fafdf22007-09-16 21:08:06 +00002511int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002512 QEMUSnapshotInfo **psn_info)
2513{
2514 BlockDriver *drv = bs->drv;
2515 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002516 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002517 if (drv->bdrv_snapshot_list)
2518 return drv->bdrv_snapshot_list(bs, psn_info);
2519 if (bs->file)
2520 return bdrv_snapshot_list(bs->file, psn_info);
2521 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002522}
2523
edison51ef6722010-09-21 19:58:41 -07002524int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2525 const char *snapshot_name)
2526{
2527 BlockDriver *drv = bs->drv;
2528 if (!drv) {
2529 return -ENOMEDIUM;
2530 }
2531 if (!bs->read_only) {
2532 return -EINVAL;
2533 }
2534 if (drv->bdrv_snapshot_load_tmp) {
2535 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2536 }
2537 return -ENOTSUP;
2538}
2539
bellardfaea38e2006-08-05 21:31:00 +00002540#define NB_SUFFIXES 4
2541
2542char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2543{
2544 static const char suffixes[NB_SUFFIXES] = "KMGT";
2545 int64_t base;
2546 int i;
2547
2548 if (size <= 999) {
2549 snprintf(buf, buf_size, "%" PRId64, size);
2550 } else {
2551 base = 1024;
2552 for(i = 0; i < NB_SUFFIXES; i++) {
2553 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002554 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002555 (double)size / base,
2556 suffixes[i]);
2557 break;
2558 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002559 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002560 ((size + (base >> 1)) / base),
2561 suffixes[i]);
2562 break;
2563 }
2564 base = base * 1024;
2565 }
2566 }
2567 return buf;
2568}
2569
2570char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2571{
2572 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002573#ifdef _WIN32
2574 struct tm *ptm;
2575#else
bellardfaea38e2006-08-05 21:31:00 +00002576 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002577#endif
bellardfaea38e2006-08-05 21:31:00 +00002578 time_t ti;
2579 int64_t secs;
2580
2581 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002582 snprintf(buf, buf_size,
2583 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002584 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2585 } else {
2586 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002587#ifdef _WIN32
2588 ptm = localtime(&ti);
2589 strftime(date_buf, sizeof(date_buf),
2590 "%Y-%m-%d %H:%M:%S", ptm);
2591#else
bellardfaea38e2006-08-05 21:31:00 +00002592 localtime_r(&ti, &tm);
2593 strftime(date_buf, sizeof(date_buf),
2594 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002595#endif
bellardfaea38e2006-08-05 21:31:00 +00002596 secs = sn->vm_clock_nsec / 1000000000;
2597 snprintf(clock_buf, sizeof(clock_buf),
2598 "%02d:%02d:%02d.%03d",
2599 (int)(secs / 3600),
2600 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002601 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002602 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2603 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002604 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002605 sn->id_str, sn->name,
2606 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2607 date_buf,
2608 clock_buf);
2609 }
2610 return buf;
2611}
2612
bellard83f64092006-08-01 16:21:11 +00002613/**************************************************************/
2614/* async I/Os */
2615
aliguori3b69e4b2009-01-22 16:59:24 +00002616BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002617 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002618 BlockDriverCompletionFunc *cb, void *opaque)
2619{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002620 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2621
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002622 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002623 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002624}
2625
aliguorif141eaf2009-04-07 18:43:24 +00002626BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2627 QEMUIOVector *qiov, int nb_sectors,
2628 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002629{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002630 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2631
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002632 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002633 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002634}
2635
Kevin Wolf40b4f532009-09-09 17:53:37 +02002636
2637typedef struct MultiwriteCB {
2638 int error;
2639 int num_requests;
2640 int num_callbacks;
2641 struct {
2642 BlockDriverCompletionFunc *cb;
2643 void *opaque;
2644 QEMUIOVector *free_qiov;
2645 void *free_buf;
2646 } callbacks[];
2647} MultiwriteCB;
2648
2649static void multiwrite_user_cb(MultiwriteCB *mcb)
2650{
2651 int i;
2652
2653 for (i = 0; i < mcb->num_callbacks; i++) {
2654 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002655 if (mcb->callbacks[i].free_qiov) {
2656 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2657 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002658 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002659 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002660 }
2661}
2662
2663static void multiwrite_cb(void *opaque, int ret)
2664{
2665 MultiwriteCB *mcb = opaque;
2666
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002667 trace_multiwrite_cb(mcb, ret);
2668
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002669 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002670 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002671 }
2672
2673 mcb->num_requests--;
2674 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002675 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002676 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002677 }
2678}
2679
2680static int multiwrite_req_compare(const void *a, const void *b)
2681{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002682 const BlockRequest *req1 = a, *req2 = b;
2683
2684 /*
2685 * Note that we can't simply subtract req2->sector from req1->sector
2686 * here as that could overflow the return value.
2687 */
2688 if (req1->sector > req2->sector) {
2689 return 1;
2690 } else if (req1->sector < req2->sector) {
2691 return -1;
2692 } else {
2693 return 0;
2694 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002695}
2696
2697/*
2698 * Takes a bunch of requests and tries to merge them. Returns the number of
2699 * requests that remain after merging.
2700 */
2701static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2702 int num_reqs, MultiwriteCB *mcb)
2703{
2704 int i, outidx;
2705
2706 // Sort requests by start sector
2707 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2708
2709 // Check if adjacent requests touch the same clusters. If so, combine them,
2710 // filling up gaps with zero sectors.
2711 outidx = 0;
2712 for (i = 1; i < num_reqs; i++) {
2713 int merge = 0;
2714 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2715
2716 // This handles the cases that are valid for all block drivers, namely
2717 // exactly sequential writes and overlapping writes.
2718 if (reqs[i].sector <= oldreq_last) {
2719 merge = 1;
2720 }
2721
2722 // The block driver may decide that it makes sense to combine requests
2723 // even if there is a gap of some sectors between them. In this case,
2724 // the gap is filled with zeros (therefore only applicable for yet
2725 // unused space in format like qcow2).
2726 if (!merge && bs->drv->bdrv_merge_requests) {
2727 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2728 }
2729
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002730 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2731 merge = 0;
2732 }
2733
Kevin Wolf40b4f532009-09-09 17:53:37 +02002734 if (merge) {
2735 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002736 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002737 qemu_iovec_init(qiov,
2738 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2739
2740 // Add the first request to the merged one. If the requests are
2741 // overlapping, drop the last sectors of the first request.
2742 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2743 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2744
2745 // We might need to add some zeros between the two requests
2746 if (reqs[i].sector > oldreq_last) {
2747 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2748 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2749 memset(buf, 0, zero_bytes);
2750 qemu_iovec_add(qiov, buf, zero_bytes);
2751 mcb->callbacks[i].free_buf = buf;
2752 }
2753
2754 // Add the second request
2755 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2756
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002757 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002758 reqs[outidx].qiov = qiov;
2759
2760 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2761 } else {
2762 outidx++;
2763 reqs[outidx].sector = reqs[i].sector;
2764 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2765 reqs[outidx].qiov = reqs[i].qiov;
2766 }
2767 }
2768
2769 return outidx + 1;
2770}
2771
2772/*
2773 * Submit multiple AIO write requests at once.
2774 *
2775 * On success, the function returns 0 and all requests in the reqs array have
2776 * been submitted. In error case this function returns -1, and any of the
2777 * requests may or may not be submitted yet. In particular, this means that the
2778 * callback will be called for some of the requests, for others it won't. The
2779 * caller must check the error field of the BlockRequest to wait for the right
2780 * callbacks (if error != 0, no callback will be called).
2781 *
2782 * The implementation may modify the contents of the reqs array, e.g. to merge
2783 * requests. However, the fields opaque and error are left unmodified as they
2784 * are used to signal failure for a single request to the caller.
2785 */
2786int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2787{
2788 BlockDriverAIOCB *acb;
2789 MultiwriteCB *mcb;
2790 int i;
2791
Ryan Harper301db7c2011-03-07 10:01:04 -06002792 /* don't submit writes if we don't have a medium */
2793 if (bs->drv == NULL) {
2794 for (i = 0; i < num_reqs; i++) {
2795 reqs[i].error = -ENOMEDIUM;
2796 }
2797 return -1;
2798 }
2799
Kevin Wolf40b4f532009-09-09 17:53:37 +02002800 if (num_reqs == 0) {
2801 return 0;
2802 }
2803
2804 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002805 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002806 mcb->num_requests = 0;
2807 mcb->num_callbacks = num_reqs;
2808
2809 for (i = 0; i < num_reqs; i++) {
2810 mcb->callbacks[i].cb = reqs[i].cb;
2811 mcb->callbacks[i].opaque = reqs[i].opaque;
2812 }
2813
2814 // Check for mergable requests
2815 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2816
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002817 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2818
Kevin Wolf453f9a12010-07-02 14:01:21 +02002819 /*
2820 * Run the aio requests. As soon as one request can't be submitted
2821 * successfully, fail all requests that are not yet submitted (we must
2822 * return failure for all requests anyway)
2823 *
2824 * num_requests cannot be set to the right value immediately: If
2825 * bdrv_aio_writev fails for some request, num_requests would be too high
2826 * and therefore multiwrite_cb() would never recognize the multiwrite
2827 * request as completed. We also cannot use the loop variable i to set it
2828 * when the first request fails because the callback may already have been
2829 * called for previously submitted requests. Thus, num_requests must be
2830 * incremented for each request that is submitted.
2831 *
2832 * The problem that callbacks may be called early also means that we need
2833 * to take care that num_requests doesn't become 0 before all requests are
2834 * submitted - multiwrite_cb() would consider the multiwrite request
2835 * completed. A dummy request that is "completed" by a manual call to
2836 * multiwrite_cb() takes care of this.
2837 */
2838 mcb->num_requests = 1;
2839
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002840 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002841 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002842 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002843 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2844 reqs[i].nb_sectors, multiwrite_cb, mcb);
2845
2846 if (acb == NULL) {
2847 // We can only fail the whole thing if no request has been
2848 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2849 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002850 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002851 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002852 goto fail;
2853 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002854 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002855 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002856 break;
2857 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002858 }
2859 }
2860
Kevin Wolf453f9a12010-07-02 14:01:21 +02002861 /* Complete the dummy request */
2862 multiwrite_cb(mcb, 0);
2863
Kevin Wolf40b4f532009-09-09 17:53:37 +02002864 return 0;
2865
2866fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002867 for (i = 0; i < mcb->num_callbacks; i++) {
2868 reqs[i].error = -EIO;
2869 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002870 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002871 return -1;
2872}
2873
bellard83f64092006-08-01 16:21:11 +00002874void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002875{
aliguori6bbff9a2009-03-20 18:25:59 +00002876 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002877}
2878
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002879/* block I/O throttling */
2880static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2881 bool is_write, double elapsed_time, uint64_t *wait)
2882{
2883 uint64_t bps_limit = 0;
2884 double bytes_limit, bytes_base, bytes_res;
2885 double slice_time, wait_time;
2886
2887 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2888 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2889 } else if (bs->io_limits.bps[is_write]) {
2890 bps_limit = bs->io_limits.bps[is_write];
2891 } else {
2892 if (wait) {
2893 *wait = 0;
2894 }
2895
2896 return false;
2897 }
2898
2899 slice_time = bs->slice_end - bs->slice_start;
2900 slice_time /= (NANOSECONDS_PER_SECOND);
2901 bytes_limit = bps_limit * slice_time;
2902 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2903 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2904 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2905 }
2906
2907 /* bytes_base: the bytes of data which have been read/written; and
2908 * it is obtained from the history statistic info.
2909 * bytes_res: the remaining bytes of data which need to be read/written.
2910 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2911 * the total time for completing reading/writting all data.
2912 */
2913 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2914
2915 if (bytes_base + bytes_res <= bytes_limit) {
2916 if (wait) {
2917 *wait = 0;
2918 }
2919
2920 return false;
2921 }
2922
2923 /* Calc approx time to dispatch */
2924 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2925
2926 /* When the I/O rate at runtime exceeds the limits,
2927 * bs->slice_end need to be extended in order that the current statistic
2928 * info can be kept until the timer fire, so it is increased and tuned
2929 * based on the result of experiment.
2930 */
2931 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2932 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2933 if (wait) {
2934 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2935 }
2936
2937 return true;
2938}
2939
2940static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2941 double elapsed_time, uint64_t *wait)
2942{
2943 uint64_t iops_limit = 0;
2944 double ios_limit, ios_base;
2945 double slice_time, wait_time;
2946
2947 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2948 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2949 } else if (bs->io_limits.iops[is_write]) {
2950 iops_limit = bs->io_limits.iops[is_write];
2951 } else {
2952 if (wait) {
2953 *wait = 0;
2954 }
2955
2956 return false;
2957 }
2958
2959 slice_time = bs->slice_end - bs->slice_start;
2960 slice_time /= (NANOSECONDS_PER_SECOND);
2961 ios_limit = iops_limit * slice_time;
2962 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2963 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2964 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2965 }
2966
2967 if (ios_base + 1 <= ios_limit) {
2968 if (wait) {
2969 *wait = 0;
2970 }
2971
2972 return false;
2973 }
2974
2975 /* Calc approx time to dispatch */
2976 wait_time = (ios_base + 1) / iops_limit;
2977 if (wait_time > elapsed_time) {
2978 wait_time = wait_time - elapsed_time;
2979 } else {
2980 wait_time = 0;
2981 }
2982
2983 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2984 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2985 if (wait) {
2986 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2987 }
2988
2989 return true;
2990}
2991
2992static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2993 bool is_write, int64_t *wait)
2994{
2995 int64_t now, max_wait;
2996 uint64_t bps_wait = 0, iops_wait = 0;
2997 double elapsed_time;
2998 int bps_ret, iops_ret;
2999
3000 now = qemu_get_clock_ns(vm_clock);
3001 if ((bs->slice_start < now)
3002 && (bs->slice_end > now)) {
3003 bs->slice_end = now + bs->slice_time;
3004 } else {
3005 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3006 bs->slice_start = now;
3007 bs->slice_end = now + bs->slice_time;
3008
3009 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3010 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3011
3012 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3013 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3014 }
3015
3016 elapsed_time = now - bs->slice_start;
3017 elapsed_time /= (NANOSECONDS_PER_SECOND);
3018
3019 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3020 is_write, elapsed_time, &bps_wait);
3021 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3022 elapsed_time, &iops_wait);
3023 if (bps_ret || iops_ret) {
3024 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3025 if (wait) {
3026 *wait = max_wait;
3027 }
3028
3029 now = qemu_get_clock_ns(vm_clock);
3030 if (bs->slice_end < now + max_wait) {
3031 bs->slice_end = now + max_wait;
3032 }
3033
3034 return true;
3035 }
3036
3037 if (wait) {
3038 *wait = 0;
3039 }
3040
3041 return false;
3042}
pbrookce1a14d2006-08-07 02:38:06 +00003043
bellard83f64092006-08-01 16:21:11 +00003044/**************************************************************/
3045/* async block device emulation */
3046
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003047typedef struct BlockDriverAIOCBSync {
3048 BlockDriverAIOCB common;
3049 QEMUBH *bh;
3050 int ret;
3051 /* vector translation state */
3052 QEMUIOVector *qiov;
3053 uint8_t *bounce;
3054 int is_write;
3055} BlockDriverAIOCBSync;
3056
3057static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3058{
Kevin Wolfb666d232010-05-05 11:44:39 +02003059 BlockDriverAIOCBSync *acb =
3060 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003061 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003062 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003063 qemu_aio_release(acb);
3064}
3065
3066static AIOPool bdrv_em_aio_pool = {
3067 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3068 .cancel = bdrv_aio_cancel_em,
3069};
3070
bellard83f64092006-08-01 16:21:11 +00003071static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003072{
pbrookce1a14d2006-08-07 02:38:06 +00003073 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003074
aliguorif141eaf2009-04-07 18:43:24 +00003075 if (!acb->is_write)
3076 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003077 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003078 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003079 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003080 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003081 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003082}
bellardbeac80c2006-06-26 20:08:57 +00003083
aliguorif141eaf2009-04-07 18:43:24 +00003084static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3085 int64_t sector_num,
3086 QEMUIOVector *qiov,
3087 int nb_sectors,
3088 BlockDriverCompletionFunc *cb,
3089 void *opaque,
3090 int is_write)
3091
bellardea2384d2004-08-01 21:59:26 +00003092{
pbrookce1a14d2006-08-07 02:38:06 +00003093 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003094
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003095 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003096 acb->is_write = is_write;
3097 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003098 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00003099
pbrookce1a14d2006-08-07 02:38:06 +00003100 if (!acb->bh)
3101 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003102
3103 if (is_write) {
3104 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003105 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003106 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003107 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003108 }
3109
pbrookce1a14d2006-08-07 02:38:06 +00003110 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003111
pbrookce1a14d2006-08-07 02:38:06 +00003112 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003113}
3114
aliguorif141eaf2009-04-07 18:43:24 +00003115static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3116 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003117 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003118{
aliguorif141eaf2009-04-07 18:43:24 +00003119 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003120}
3121
aliguorif141eaf2009-04-07 18:43:24 +00003122static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3123 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3124 BlockDriverCompletionFunc *cb, void *opaque)
3125{
3126 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3127}
3128
Kevin Wolf68485422011-06-30 10:05:46 +02003129
3130typedef struct BlockDriverAIOCBCoroutine {
3131 BlockDriverAIOCB common;
3132 BlockRequest req;
3133 bool is_write;
3134 QEMUBH* bh;
3135} BlockDriverAIOCBCoroutine;
3136
3137static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3138{
3139 qemu_aio_flush();
3140}
3141
3142static AIOPool bdrv_em_co_aio_pool = {
3143 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3144 .cancel = bdrv_aio_co_cancel_em,
3145};
3146
Paolo Bonzini35246a62011-10-14 10:41:29 +02003147static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003148{
3149 BlockDriverAIOCBCoroutine *acb = opaque;
3150
3151 acb->common.cb(acb->common.opaque, acb->req.error);
3152 qemu_bh_delete(acb->bh);
3153 qemu_aio_release(acb);
3154}
3155
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003156/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3157static void coroutine_fn bdrv_co_do_rw(void *opaque)
3158{
3159 BlockDriverAIOCBCoroutine *acb = opaque;
3160 BlockDriverState *bs = acb->common.bs;
3161
3162 if (!acb->is_write) {
3163 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3164 acb->req.nb_sectors, acb->req.qiov);
3165 } else {
3166 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3167 acb->req.nb_sectors, acb->req.qiov);
3168 }
3169
Paolo Bonzini35246a62011-10-14 10:41:29 +02003170 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003171 qemu_bh_schedule(acb->bh);
3172}
3173
Kevin Wolf68485422011-06-30 10:05:46 +02003174static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3175 int64_t sector_num,
3176 QEMUIOVector *qiov,
3177 int nb_sectors,
3178 BlockDriverCompletionFunc *cb,
3179 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003180 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003181{
3182 Coroutine *co;
3183 BlockDriverAIOCBCoroutine *acb;
3184
3185 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3186 acb->req.sector = sector_num;
3187 acb->req.nb_sectors = nb_sectors;
3188 acb->req.qiov = qiov;
3189 acb->is_write = is_write;
3190
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003191 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003192 qemu_coroutine_enter(co, acb);
3193
3194 return &acb->common;
3195}
3196
Paolo Bonzini07f07612011-10-17 12:32:12 +02003197static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003198{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003199 BlockDriverAIOCBCoroutine *acb = opaque;
3200 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003201
Paolo Bonzini07f07612011-10-17 12:32:12 +02003202 acb->req.error = bdrv_co_flush(bs);
3203 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003204 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003205}
3206
Paolo Bonzini07f07612011-10-17 12:32:12 +02003207BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003208 BlockDriverCompletionFunc *cb, void *opaque)
3209{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003210 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003211
Paolo Bonzini07f07612011-10-17 12:32:12 +02003212 Coroutine *co;
3213 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003214
Paolo Bonzini07f07612011-10-17 12:32:12 +02003215 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3216 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3217 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003218
Alexander Graf016f5cf2010-05-26 17:51:49 +02003219 return &acb->common;
3220}
3221
Paolo Bonzini4265d622011-10-17 12:32:14 +02003222static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3223{
3224 BlockDriverAIOCBCoroutine *acb = opaque;
3225 BlockDriverState *bs = acb->common.bs;
3226
3227 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3228 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3229 qemu_bh_schedule(acb->bh);
3230}
3231
3232BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3233 int64_t sector_num, int nb_sectors,
3234 BlockDriverCompletionFunc *cb, void *opaque)
3235{
3236 Coroutine *co;
3237 BlockDriverAIOCBCoroutine *acb;
3238
3239 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3240
3241 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3242 acb->req.sector = sector_num;
3243 acb->req.nb_sectors = nb_sectors;
3244 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3245 qemu_coroutine_enter(co, acb);
3246
3247 return &acb->common;
3248}
3249
bellardea2384d2004-08-01 21:59:26 +00003250void bdrv_init(void)
3251{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003252 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003253}
pbrookce1a14d2006-08-07 02:38:06 +00003254
Markus Armbrustereb852012009-10-27 18:41:44 +01003255void bdrv_init_with_whitelist(void)
3256{
3257 use_bdrv_whitelist = 1;
3258 bdrv_init();
3259}
3260
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003261void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3262 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003263{
pbrookce1a14d2006-08-07 02:38:06 +00003264 BlockDriverAIOCB *acb;
3265
aliguori6bbff9a2009-03-20 18:25:59 +00003266 if (pool->free_aiocb) {
3267 acb = pool->free_aiocb;
3268 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003269 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003270 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003271 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003272 }
3273 acb->bs = bs;
3274 acb->cb = cb;
3275 acb->opaque = opaque;
3276 return acb;
3277}
3278
3279void qemu_aio_release(void *p)
3280{
aliguori6bbff9a2009-03-20 18:25:59 +00003281 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3282 AIOPool *pool = acb->pool;
3283 acb->next = pool->free_aiocb;
3284 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003285}
bellard19cb3732006-08-19 11:45:59 +00003286
3287/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003288/* Coroutine block device emulation */
3289
3290typedef struct CoroutineIOCompletion {
3291 Coroutine *coroutine;
3292 int ret;
3293} CoroutineIOCompletion;
3294
3295static void bdrv_co_io_em_complete(void *opaque, int ret)
3296{
3297 CoroutineIOCompletion *co = opaque;
3298
3299 co->ret = ret;
3300 qemu_coroutine_enter(co->coroutine, NULL);
3301}
3302
3303static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3304 int nb_sectors, QEMUIOVector *iov,
3305 bool is_write)
3306{
3307 CoroutineIOCompletion co = {
3308 .coroutine = qemu_coroutine_self(),
3309 };
3310 BlockDriverAIOCB *acb;
3311
3312 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003313 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3314 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003315 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003316 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3317 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003318 }
3319
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003320 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003321 if (!acb) {
3322 return -EIO;
3323 }
3324 qemu_coroutine_yield();
3325
3326 return co.ret;
3327}
3328
3329static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3330 int64_t sector_num, int nb_sectors,
3331 QEMUIOVector *iov)
3332{
3333 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3334}
3335
3336static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3337 int64_t sector_num, int nb_sectors,
3338 QEMUIOVector *iov)
3339{
3340 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3341}
3342
Paolo Bonzini07f07612011-10-17 12:32:12 +02003343static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003344{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003345 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003346
Paolo Bonzini07f07612011-10-17 12:32:12 +02003347 rwco->ret = bdrv_co_flush(rwco->bs);
3348}
3349
3350int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3351{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003352 int ret;
3353
Kevin Wolfca716362011-11-10 18:13:59 +01003354 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003355 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003356 }
3357
Kevin Wolfca716362011-11-10 18:13:59 +01003358 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003359 if (bs->drv->bdrv_co_flush_to_os) {
3360 ret = bs->drv->bdrv_co_flush_to_os(bs);
3361 if (ret < 0) {
3362 return ret;
3363 }
3364 }
3365
Kevin Wolfca716362011-11-10 18:13:59 +01003366 /* But don't actually force it to the disk with cache=unsafe */
3367 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3368 return 0;
3369 }
3370
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003371 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003372 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003373 } else if (bs->drv->bdrv_aio_flush) {
3374 BlockDriverAIOCB *acb;
3375 CoroutineIOCompletion co = {
3376 .coroutine = qemu_coroutine_self(),
3377 };
3378
3379 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3380 if (acb == NULL) {
3381 return -EIO;
3382 } else {
3383 qemu_coroutine_yield();
3384 return co.ret;
3385 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003386 } else {
3387 /*
3388 * Some block drivers always operate in either writethrough or unsafe
3389 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3390 * know how the server works (because the behaviour is hardcoded or
3391 * depends on server-side configuration), so we can't ensure that
3392 * everything is safe on disk. Returning an error doesn't work because
3393 * that would break guests even if the server operates in writethrough
3394 * mode.
3395 *
3396 * Let's hope the user knows what he's doing.
3397 */
3398 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003399 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003400}
3401
Anthony Liguori0f154232011-11-14 15:09:45 -06003402void bdrv_invalidate_cache(BlockDriverState *bs)
3403{
3404 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3405 bs->drv->bdrv_invalidate_cache(bs);
3406 }
3407}
3408
3409void bdrv_invalidate_cache_all(void)
3410{
3411 BlockDriverState *bs;
3412
3413 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3414 bdrv_invalidate_cache(bs);
3415 }
3416}
3417
Paolo Bonzini07f07612011-10-17 12:32:12 +02003418int bdrv_flush(BlockDriverState *bs)
3419{
3420 Coroutine *co;
3421 RwCo rwco = {
3422 .bs = bs,
3423 .ret = NOT_DONE,
3424 };
3425
3426 if (qemu_in_coroutine()) {
3427 /* Fast-path if already in coroutine context */
3428 bdrv_flush_co_entry(&rwco);
3429 } else {
3430 co = qemu_coroutine_create(bdrv_flush_co_entry);
3431 qemu_coroutine_enter(co, &rwco);
3432 while (rwco.ret == NOT_DONE) {
3433 qemu_aio_wait();
3434 }
3435 }
3436
3437 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003438}
3439
Paolo Bonzini4265d622011-10-17 12:32:14 +02003440static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3441{
3442 RwCo *rwco = opaque;
3443
3444 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3445}
3446
3447int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3448 int nb_sectors)
3449{
3450 if (!bs->drv) {
3451 return -ENOMEDIUM;
3452 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3453 return -EIO;
3454 } else if (bs->read_only) {
3455 return -EROFS;
3456 } else if (bs->drv->bdrv_co_discard) {
3457 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3458 } else if (bs->drv->bdrv_aio_discard) {
3459 BlockDriverAIOCB *acb;
3460 CoroutineIOCompletion co = {
3461 .coroutine = qemu_coroutine_self(),
3462 };
3463
3464 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3465 bdrv_co_io_em_complete, &co);
3466 if (acb == NULL) {
3467 return -EIO;
3468 } else {
3469 qemu_coroutine_yield();
3470 return co.ret;
3471 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003472 } else {
3473 return 0;
3474 }
3475}
3476
3477int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3478{
3479 Coroutine *co;
3480 RwCo rwco = {
3481 .bs = bs,
3482 .sector_num = sector_num,
3483 .nb_sectors = nb_sectors,
3484 .ret = NOT_DONE,
3485 };
3486
3487 if (qemu_in_coroutine()) {
3488 /* Fast-path if already in coroutine context */
3489 bdrv_discard_co_entry(&rwco);
3490 } else {
3491 co = qemu_coroutine_create(bdrv_discard_co_entry);
3492 qemu_coroutine_enter(co, &rwco);
3493 while (rwco.ret == NOT_DONE) {
3494 qemu_aio_wait();
3495 }
3496 }
3497
3498 return rwco.ret;
3499}
3500
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003501/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003502/* removable device support */
3503
3504/**
3505 * Return TRUE if the media is present
3506 */
3507int bdrv_is_inserted(BlockDriverState *bs)
3508{
3509 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003510
bellard19cb3732006-08-19 11:45:59 +00003511 if (!drv)
3512 return 0;
3513 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003514 return 1;
3515 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003516}
3517
3518/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003519 * Return whether the media changed since the last call to this
3520 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003521 */
3522int bdrv_media_changed(BlockDriverState *bs)
3523{
3524 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003525
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003526 if (drv && drv->bdrv_media_changed) {
3527 return drv->bdrv_media_changed(bs);
3528 }
3529 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003530}
3531
3532/**
3533 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3534 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003535void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003536{
3537 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003538
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003539 if (drv && drv->bdrv_eject) {
3540 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003541 }
bellard19cb3732006-08-19 11:45:59 +00003542}
3543
bellard19cb3732006-08-19 11:45:59 +00003544/**
3545 * Lock or unlock the media (if it is locked, the user won't be able
3546 * to eject it manually).
3547 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003548void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003549{
3550 BlockDriver *drv = bs->drv;
3551
Markus Armbruster025e8492011-09-06 18:58:47 +02003552 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003553
Markus Armbruster025e8492011-09-06 18:58:47 +02003554 if (drv && drv->bdrv_lock_medium) {
3555 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003556 }
3557}
ths985a03b2007-12-24 16:10:43 +00003558
3559/* needed for generic scsi interface */
3560
3561int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3562{
3563 BlockDriver *drv = bs->drv;
3564
3565 if (drv && drv->bdrv_ioctl)
3566 return drv->bdrv_ioctl(bs, req, buf);
3567 return -ENOTSUP;
3568}
aliguori7d780662009-03-12 19:57:08 +00003569
aliguori221f7152009-03-28 17:28:41 +00003570BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3571 unsigned long int req, void *buf,
3572 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003573{
aliguori221f7152009-03-28 17:28:41 +00003574 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003575
aliguori221f7152009-03-28 17:28:41 +00003576 if (drv && drv->bdrv_aio_ioctl)
3577 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3578 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003579}
aliguorie268ca52009-04-22 20:20:00 +00003580
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003581void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3582{
3583 bs->buffer_alignment = align;
3584}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003585
aliguorie268ca52009-04-22 20:20:00 +00003586void *qemu_blockalign(BlockDriverState *bs, size_t size)
3587{
3588 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3589}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003590
3591void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3592{
3593 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003594
Liran Schouraaa0eb72010-01-26 10:31:48 +02003595 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003596 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003597 if (!bs->dirty_bitmap) {
3598 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3599 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3600 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003601
Anthony Liguori7267c092011-08-20 22:09:37 -05003602 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003603 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003604 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003605 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003606 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003607 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003608 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003609 }
3610}
3611
3612int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3613{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003614 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003615
Jan Kiszkac6d22832009-11-30 18:21:20 +01003616 if (bs->dirty_bitmap &&
3617 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003618 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3619 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003620 } else {
3621 return 0;
3622 }
3623}
3624
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003625void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3626 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003627{
3628 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3629}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003630
3631int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3632{
3633 return bs->dirty_count;
3634}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003635
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003636void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3637{
3638 assert(bs->in_use != in_use);
3639 bs->in_use = in_use;
3640}
3641
3642int bdrv_in_use(BlockDriverState *bs)
3643{
3644 return bs->in_use;
3645}
3646
Luiz Capitulino28a72822011-09-26 17:43:50 -03003647void bdrv_iostatus_enable(BlockDriverState *bs)
3648{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003649 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003650 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003651}
3652
3653/* The I/O status is only enabled if the drive explicitly
3654 * enables it _and_ the VM is configured to stop on errors */
3655bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3656{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003657 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003658 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3659 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3660 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3661}
3662
3663void bdrv_iostatus_disable(BlockDriverState *bs)
3664{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003665 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003666}
3667
3668void bdrv_iostatus_reset(BlockDriverState *bs)
3669{
3670 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003671 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003672 }
3673}
3674
3675/* XXX: Today this is set by device models because it makes the implementation
3676 quite simple. However, the block layer knows about the error, so it's
3677 possible to implement this without device models being involved */
3678void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3679{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003680 if (bdrv_iostatus_is_enabled(bs) &&
3681 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003682 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003683 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3684 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003685 }
3686}
3687
Christoph Hellwiga597e792011-08-25 08:26:01 +02003688void
3689bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3690 enum BlockAcctType type)
3691{
3692 assert(type < BDRV_MAX_IOTYPE);
3693
3694 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003695 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003696 cookie->type = type;
3697}
3698
3699void
3700bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3701{
3702 assert(cookie->type < BDRV_MAX_IOTYPE);
3703
3704 bs->nr_bytes[cookie->type] += cookie->bytes;
3705 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003706 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003707}
3708
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003709int bdrv_img_create(const char *filename, const char *fmt,
3710 const char *base_filename, const char *base_fmt,
3711 char *options, uint64_t img_size, int flags)
3712{
3713 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003714 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003715 BlockDriverState *bs = NULL;
3716 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003717 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003718 int ret = 0;
3719
3720 /* Find driver and parse its options */
3721 drv = bdrv_find_format(fmt);
3722 if (!drv) {
3723 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003724 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003725 goto out;
3726 }
3727
3728 proto_drv = bdrv_find_protocol(filename);
3729 if (!proto_drv) {
3730 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003731 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003732 goto out;
3733 }
3734
3735 create_options = append_option_parameters(create_options,
3736 drv->create_options);
3737 create_options = append_option_parameters(create_options,
3738 proto_drv->create_options);
3739
3740 /* Create parameter list with default values */
3741 param = parse_option_parameters("", create_options, param);
3742
3743 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3744
3745 /* Parse -o options */
3746 if (options) {
3747 param = parse_option_parameters(options, create_options, param);
3748 if (param == NULL) {
3749 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003750 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003751 goto out;
3752 }
3753 }
3754
3755 if (base_filename) {
3756 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3757 base_filename)) {
3758 error_report("Backing file not supported for file format '%s'",
3759 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003760 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003761 goto out;
3762 }
3763 }
3764
3765 if (base_fmt) {
3766 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3767 error_report("Backing file format not supported for file "
3768 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003769 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003770 goto out;
3771 }
3772 }
3773
Jes Sorensen792da932010-12-16 13:52:17 +01003774 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3775 if (backing_file && backing_file->value.s) {
3776 if (!strcmp(filename, backing_file->value.s)) {
3777 error_report("Error: Trying to create an image with the "
3778 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003779 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003780 goto out;
3781 }
3782 }
3783
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003784 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3785 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003786 backing_drv = bdrv_find_format(backing_fmt->value.s);
3787 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003788 error_report("Unknown backing file format '%s'",
3789 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003790 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003791 goto out;
3792 }
3793 }
3794
3795 // The size for the image must always be specified, with one exception:
3796 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003797 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3798 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003799 if (backing_file && backing_file->value.s) {
3800 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003801 char buf[32];
3802
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003803 bs = bdrv_new("");
3804
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003805 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003806 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003807 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003808 goto out;
3809 }
3810 bdrv_get_geometry(bs, &size);
3811 size *= 512;
3812
3813 snprintf(buf, sizeof(buf), "%" PRId64, size);
3814 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3815 } else {
3816 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003817 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003818 goto out;
3819 }
3820 }
3821
3822 printf("Formatting '%s', fmt=%s ", filename, fmt);
3823 print_option_parameters(param);
3824 puts("");
3825
3826 ret = bdrv_create(drv, filename, param);
3827
3828 if (ret < 0) {
3829 if (ret == -ENOTSUP) {
3830 error_report("Formatting or formatting option not supported for "
3831 "file format '%s'", fmt);
3832 } else if (ret == -EFBIG) {
3833 error_report("The image size is too large for file format '%s'",
3834 fmt);
3835 } else {
3836 error_report("%s: error while creating %s: %s", filename, fmt,
3837 strerror(-ret));
3838 }
3839 }
3840
3841out:
3842 free_option_parameters(create_options);
3843 free_option_parameters(param);
3844
3845 if (bs) {
3846 bdrv_delete(bs);
3847 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003848
3849 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003850}