blob: 42bd308639704d4aa2c8e96d4a81f8b655321139 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020051static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000052static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000054 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000055static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000057 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020058static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
60 QEMUIOVector *iov);
61static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
63 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010064static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010066static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010068static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
69 int64_t sector_num,
70 QEMUIOVector *qiov,
71 int nb_sectors,
72 BlockDriverCompletionFunc *cb,
73 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010074 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000076
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080077static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
83
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010084static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000086
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010087static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000089
Markus Armbrusterf9092b12010-06-25 10:33:39 +020090/* The device to use for VM snapshots */
91static BlockDriverState *bs_snapshots;
92
Markus Armbrustereb852012009-10-27 18:41:44 +010093/* If non-zero, use only whitelisted block drivers */
94static int use_bdrv_whitelist;
95
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +000096#ifdef _WIN32
97static int is_windows_drive_prefix(const char *filename)
98{
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
102}
103
104int is_windows_drive(const char *filename)
105{
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
113}
114#endif
115
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800116/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800117void bdrv_io_limits_disable(BlockDriverState *bs)
118{
119 bs->io_limits_enabled = false;
120
121 while (qemu_co_queue_next(&bs->throttled_reqs));
122
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
127 }
128
129 bs->slice_start = 0;
130 bs->slice_end = 0;
131 bs->slice_time = 0;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
133}
134
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800135static void bdrv_block_timer(void *opaque)
136{
137 BlockDriverState *bs = opaque;
138
139 qemu_co_queue_next(&bs->throttled_reqs);
140}
141
142void bdrv_io_limits_enable(BlockDriverState *bs)
143{
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
151}
152
153bool bdrv_io_limits_enabled(BlockDriverState *bs)
154{
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
162}
163
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
166{
167 int64_t wait_time = -1;
168
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
171 }
172
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
178 */
179
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
184 }
185
186 qemu_co_queue_next(&bs->throttled_reqs);
187}
188
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000189/* check if the path starts with "<protocol>:" */
190static int path_has_protocol(const char *path)
191{
192#ifdef _WIN32
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
195 return 0;
196 }
197#endif
198
199 return strchr(path, ':') != NULL;
200}
201
bellard83f64092006-08-01 16:21:11 +0000202int path_is_absolute(const char *path)
203{
204 const char *p;
bellard21664422007-01-07 18:22:37 +0000205#ifdef _WIN32
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
208 return 1;
209#endif
bellard83f64092006-08-01 16:21:11 +0000210 p = strchr(path, ':');
211 if (p)
212 p++;
213 else
214 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000215#ifdef _WIN32
216 return (*p == '/' || *p == '\\');
217#else
218 return (*p == '/');
219#endif
bellard83f64092006-08-01 16:21:11 +0000220}
221
222/* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
224 supported. */
225void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
228{
229 const char *p, *p1;
230 int len;
231
232 if (dest_size <= 0)
233 return;
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
236 } else {
237 p = strchr(base_path, ':');
238 if (p)
239 p++;
240 else
241 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000242 p1 = strrchr(base_path, '/');
243#ifdef _WIN32
244 {
245 const char *p2;
246 p2 = strrchr(base_path, '\\');
247 if (!p1 || p2 > p1)
248 p1 = p2;
249 }
250#endif
bellard83f64092006-08-01 16:21:11 +0000251 if (p1)
252 p1++;
253 else
254 p1 = base_path;
255 if (p1 > p)
256 p = p1;
257 len = p - base_path;
258 if (len > dest_size - 1)
259 len = dest_size - 1;
260 memcpy(dest, base_path, len);
261 dest[len] = '\0';
262 pstrcat(dest, dest_size, filename);
263 }
264}
265
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500266void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000267{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
272
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
275 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200280 }
bellard83f64092006-08-01 16:21:11 +0000281 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200282
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000284}
bellardb3380822004-03-14 21:38:54 +0000285
286/* create a new block device (by default it is empty) */
287BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000288{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100289 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000290
Anthony Liguori7267c092011-08-20 22:09:37 -0500291 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000293 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000295 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300296 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000297 return bs;
298}
299
bellardea2384d2004-08-01 21:59:26 +0000300BlockDriver *bdrv_find_format(const char *format_name)
301{
302 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000305 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100306 }
bellardea2384d2004-08-01 21:59:26 +0000307 }
308 return NULL;
309}
310
Markus Armbrustereb852012009-10-27 18:41:44 +0100311static int bdrv_is_whitelisted(BlockDriver *drv)
312{
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
315 };
316 const char **p;
317
318 if (!whitelist[0])
319 return 1; /* no whitelist, anything goes */
320
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
323 return 1;
324 }
325 }
326 return 0;
327}
328
329BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
330{
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
333}
334
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200335int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000337{
338 if (!drv->bdrv_create)
339 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200340
341 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000342}
343
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200344int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
345{
346 BlockDriver *drv;
347
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900348 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200349 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000350 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351 }
352
353 return bdrv_create(drv, filename, options);
354}
355
bellardd5249392004-08-03 21:14:23 +0000356#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000357void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000358{
bellard3b9f94e2007-01-07 17:27:07 +0000359 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000360
bellard3b9f94e2007-01-07 17:27:07 +0000361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000363}
364#else
bellard95389c82005-12-18 18:28:15 +0000365void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000366{
367 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000368 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000369 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000370 tmpdir = getenv("TMPDIR");
371 if (!tmpdir)
372 tmpdir = "/tmp";
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000374 fd = mkstemp(filename);
375 close(fd);
376}
bellardd5249392004-08-03 21:14:23 +0000377#endif
bellardea2384d2004-08-01 21:59:26 +0000378
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200379/*
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
382 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200383static BlockDriver *find_hdev_driver(const char *filename)
384{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200387
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100388 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
392 score_max = score;
393 drv = d;
394 }
395 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200396 }
397
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200398 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200399}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200400
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900401BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200402{
403 BlockDriver *drv1;
404 char protocol[128];
405 int len;
406 const char *p;
407
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
409
Christoph Hellwig39508e72010-06-23 12:25:17 +0200410 /*
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
416 */
417 drv1 = find_hdev_driver(filename);
418 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200419 return drv1;
420 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200421
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000422 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200423 return bdrv_find_format("file");
424 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000425 p = strchr(filename, ':');
426 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200427 len = p - filename;
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
435 return drv1;
436 }
437 }
438 return NULL;
439}
440
Stefan Weilc98ac352010-07-21 21:51:51 +0200441static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000442{
bellard83f64092006-08-01 16:21:11 +0000443 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000444 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000445 uint8_t buf[2048];
446 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000447
Naphtali Spreif5edb012010-01-17 16:48:13 +0200448 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200449 if (ret < 0) {
450 *pdrv = NULL;
451 return ret;
452 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700453
Kevin Wolf08a00552010-06-01 18:37:31 +0200454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700456 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200457 drv = bdrv_find_format("raw");
458 if (!drv) {
459 ret = -ENOENT;
460 }
461 *pdrv = drv;
462 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700464
bellard83f64092006-08-01 16:21:11 +0000465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
466 bdrv_delete(bs);
467 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200468 *pdrv = NULL;
469 return ret;
bellard83f64092006-08-01 16:21:11 +0000470 }
471
bellardea2384d2004-08-01 21:59:26 +0000472 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200473 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
478 score_max = score;
479 drv = drv1;
480 }
bellardea2384d2004-08-01 21:59:26 +0000481 }
482 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200483 if (!drv) {
484 ret = -ENOENT;
485 }
486 *pdrv = drv;
487 return ret;
bellardea2384d2004-08-01 21:59:26 +0000488}
489
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100490/**
491 * Set the current 'total_sectors' value
492 */
493static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
494{
495 BlockDriver *drv = bs->drv;
496
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
498 if (bs->sg)
499 return 0;
500
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
504 if (length < 0) {
505 return length;
506 }
507 hint = length >> BDRV_SECTOR_BITS;
508 }
509
510 bs->total_sectors = hint;
511 return 0;
512}
513
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100514/**
515 * Set open flags for a given cache mode
516 *
517 * Return 0 on success, -1 if the cache mode was invalid.
518 */
519int bdrv_parse_cache_flags(const char *mode, int *flags)
520{
521 *flags &= ~BDRV_O_CACHE_MASK;
522
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
534 } else {
535 return -1;
536 }
537
538 return 0;
539}
540
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200541/*
Kevin Wolf57915332010-04-14 15:24:50 +0200542 * Common part for opening disk images and files
543 */
544static int bdrv_open_common(BlockDriverState *bs, const char *filename,
545 int flags, BlockDriver *drv)
546{
547 int ret, open_flags;
548
549 assert(drv != NULL);
550
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100551 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
552
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200553 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100554 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200555 bs->encrypted = 0;
556 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100557 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200558 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100559 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200560 bs->buffer_alignment = 512;
561
562 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100563 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200564
565 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
566 return -ENOTSUP;
567 }
568
569 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500570 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200571
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100572 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200573
574 /*
575 * Clear flags that are internal to the block layer before opening the
576 * image.
577 */
578 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
579
580 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200581 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200582 */
583 if (bs->is_temporary) {
584 open_flags |= BDRV_O_RDWR;
585 }
586
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100587 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
588
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200589 /* Open the image, either directly or using a protocol */
590 if (drv->bdrv_file_open) {
591 ret = drv->bdrv_file_open(bs, filename, open_flags);
592 } else {
593 ret = bdrv_file_open(&bs->file, filename, open_flags);
594 if (ret >= 0) {
595 ret = drv->bdrv_open(bs, open_flags);
596 }
597 }
598
Kevin Wolf57915332010-04-14 15:24:50 +0200599 if (ret < 0) {
600 goto free_and_fail;
601 }
602
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100603 ret = refresh_total_sectors(bs, bs->total_sectors);
604 if (ret < 0) {
605 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200606 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100607
Kevin Wolf57915332010-04-14 15:24:50 +0200608#ifndef _WIN32
609 if (bs->is_temporary) {
610 unlink(filename);
611 }
612#endif
613 return 0;
614
615free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200616 if (bs->file) {
617 bdrv_delete(bs->file);
618 bs->file = NULL;
619 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500620 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200621 bs->opaque = NULL;
622 bs->drv = NULL;
623 return ret;
624}
625
626/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200627 * Opens a file using a protocol (file, host_device, nbd, ...)
628 */
bellard83f64092006-08-01 16:21:11 +0000629int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000630{
bellard83f64092006-08-01 16:21:11 +0000631 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200632 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000633 int ret;
634
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900635 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200636 if (!drv) {
637 return -ENOENT;
638 }
639
bellard83f64092006-08-01 16:21:11 +0000640 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200641 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000642 if (ret < 0) {
643 bdrv_delete(bs);
644 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000645 }
aliguori71d07702009-03-03 17:37:16 +0000646 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000647 *pbs = bs;
648 return 0;
bellardea2384d2004-08-01 21:59:26 +0000649}
bellardfc01f7e2003-06-30 10:03:06 +0000650
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200651/*
652 * Opens a disk image (raw, qcow2, vmdk, ...)
653 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200654int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
655 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000656{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200657 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200658 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000659
bellard83f64092006-08-01 16:21:11 +0000660 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000661 BlockDriverState *bs1;
662 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000663 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200664 BlockDriver *bdrv_qcow2;
665 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200666 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000667
bellardea2384d2004-08-01 21:59:26 +0000668 /* if snapshot, we create a temporary backing file and open it
669 instead of opening 'filename' directly */
670
671 /* if there is a backing file, use it */
672 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200673 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000674 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000675 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000676 return ret;
bellardea2384d2004-08-01 21:59:26 +0000677 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200678 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000679
680 if (bs1->drv && bs1->drv->protocol_name)
681 is_protocol = 1;
682
bellardea2384d2004-08-01 21:59:26 +0000683 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000684
bellardea2384d2004-08-01 21:59:26 +0000685 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000686
687 /* Real path is meaningless for protocols */
688 if (is_protocol)
689 snprintf(backing_filename, sizeof(backing_filename),
690 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000691 else if (!realpath(filename, backing_filename))
692 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000693
Kevin Wolf91a073a2009-05-27 14:48:06 +0200694 bdrv_qcow2 = bdrv_find_format("qcow2");
695 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
696
Jes Sorensen3e829902010-05-27 16:20:30 +0200697 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200698 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
699 if (drv) {
700 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
701 drv->format_name);
702 }
703
704 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200705 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000706 if (ret < 0) {
707 return ret;
bellardea2384d2004-08-01 21:59:26 +0000708 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200709
bellardea2384d2004-08-01 21:59:26 +0000710 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200711 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000712 bs->is_temporary = 1;
713 }
bellard712e7872005-04-28 21:09:32 +0000714
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200715 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200716 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200717 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000718 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100719
aliguori51d7c002009-03-05 23:00:29 +0000720 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000721 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000722 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200723
724 /* Open the image */
725 ret = bdrv_open_common(bs, filename, flags, drv);
726 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100727 goto unlink_and_fail;
728 }
729
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200730 /* If there is a backing file, use it */
731 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
732 char backing_filename[PATH_MAX];
733 int back_flags;
734 BlockDriver *back_drv = NULL;
735
736 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000737
738 if (path_has_protocol(bs->backing_file)) {
739 pstrcpy(backing_filename, sizeof(backing_filename),
740 bs->backing_file);
741 } else {
742 path_combine(backing_filename, sizeof(backing_filename),
743 filename, bs->backing_file);
744 }
745
746 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200747 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000748 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200749
750 /* backing files always opened read-only */
751 back_flags =
752 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
753
754 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
755 if (ret < 0) {
756 bdrv_close(bs);
757 return ret;
758 }
759 if (bs->is_temporary) {
760 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
761 } else {
762 /* base image inherits from "parent" */
763 bs->backing_hd->keep_read_only = bs->keep_read_only;
764 }
765 }
766
767 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200768 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200769 }
770
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800771 /* throttling disk I/O limits */
772 if (bs->io_limits_enabled) {
773 bdrv_io_limits_enable(bs);
774 }
775
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200776 return 0;
777
778unlink_and_fail:
779 if (bs->is_temporary) {
780 unlink(filename);
781 }
782 return ret;
783}
784
bellardfc01f7e2003-06-30 10:03:06 +0000785void bdrv_close(BlockDriverState *bs)
786{
bellard19cb3732006-08-19 11:45:59 +0000787 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200788 if (bs == bs_snapshots) {
789 bs_snapshots = NULL;
790 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100791 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000792 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100793 bs->backing_hd = NULL;
794 }
bellardea2384d2004-08-01 21:59:26 +0000795 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500796 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000797#ifdef _WIN32
798 if (bs->is_temporary) {
799 unlink(bs->filename);
800 }
bellard67b915a2004-03-31 23:37:16 +0000801#endif
bellardea2384d2004-08-01 21:59:26 +0000802 bs->opaque = NULL;
803 bs->drv = NULL;
bellardb3380822004-03-14 21:38:54 +0000804
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200805 if (bs->file != NULL) {
806 bdrv_close(bs->file);
807 }
808
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200809 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000810 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800811
812 /*throttling disk I/O limits*/
813 if (bs->io_limits_enabled) {
814 bdrv_io_limits_disable(bs);
815 }
bellardb3380822004-03-14 21:38:54 +0000816}
817
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900818void bdrv_close_all(void)
819{
820 BlockDriverState *bs;
821
822 QTAILQ_FOREACH(bs, &bdrv_states, list) {
823 bdrv_close(bs);
824 }
825}
826
Ryan Harperd22b2f42011-03-29 20:51:47 -0500827/* make a BlockDriverState anonymous by removing from bdrv_state list.
828 Also, NULL terminate the device_name to prevent double remove */
829void bdrv_make_anon(BlockDriverState *bs)
830{
831 if (bs->device_name[0] != '\0') {
832 QTAILQ_REMOVE(&bdrv_states, bs, list);
833 }
834 bs->device_name[0] = '\0';
835}
836
bellardb3380822004-03-14 21:38:54 +0000837void bdrv_delete(BlockDriverState *bs)
838{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200839 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200840
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100841 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500842 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000843
bellardb3380822004-03-14 21:38:54 +0000844 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200845 if (bs->file != NULL) {
846 bdrv_delete(bs->file);
847 }
848
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200849 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500850 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000851}
852
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200853int bdrv_attach_dev(BlockDriverState *bs, void *dev)
854/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200855{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200856 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200857 return -EBUSY;
858 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200859 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300860 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200861 return 0;
862}
863
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200864/* TODO qdevified devices don't use this, remove when devices are qdevified */
865void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200866{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200867 if (bdrv_attach_dev(bs, dev) < 0) {
868 abort();
869 }
870}
871
872void bdrv_detach_dev(BlockDriverState *bs, void *dev)
873/* TODO change to DeviceState *dev when all users are qdevified */
874{
875 assert(bs->dev == dev);
876 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200877 bs->dev_ops = NULL;
878 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200879 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200880}
881
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200882/* TODO change to return DeviceState * when all users are qdevified */
883void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200884{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200885 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200886}
887
Markus Armbruster0e49de52011-08-03 15:07:41 +0200888void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
889 void *opaque)
890{
891 bs->dev_ops = ops;
892 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200893 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
894 bs_snapshots = NULL;
895 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200896}
897
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200898static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200899{
Markus Armbruster145feb12011-08-03 15:07:42 +0200900 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200901 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200902 }
903}
904
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200905bool bdrv_dev_has_removable_media(BlockDriverState *bs)
906{
907 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
908}
909
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100910void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
911{
912 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
913 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
914 }
915}
916
Markus Armbrustere4def802011-09-06 18:58:53 +0200917bool bdrv_dev_is_tray_open(BlockDriverState *bs)
918{
919 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
920 return bs->dev_ops->is_tray_open(bs->dev_opaque);
921 }
922 return false;
923}
924
Markus Armbruster145feb12011-08-03 15:07:42 +0200925static void bdrv_dev_resize_cb(BlockDriverState *bs)
926{
927 if (bs->dev_ops && bs->dev_ops->resize_cb) {
928 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200929 }
930}
931
Markus Armbrusterf1076392011-09-06 18:58:46 +0200932bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
933{
934 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
935 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
936 }
937 return false;
938}
939
aliguorie97fc192009-04-21 23:11:50 +0000940/*
941 * Run consistency checks on an image
942 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200943 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200944 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200945 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000946 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200947int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000948{
949 if (bs->drv->bdrv_check == NULL) {
950 return -ENOTSUP;
951 }
952
Kevin Wolfe076f332010-06-29 11:43:13 +0200953 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +0200954 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +0000955}
956
Kevin Wolf8a426612010-07-16 17:17:01 +0200957#define COMMIT_BUF_SECTORS 2048
958
bellard33e39632003-07-06 17:15:21 +0000959/* commit COW file into the raw image */
960int bdrv_commit(BlockDriverState *bs)
961{
bellard19cb3732006-08-19 11:45:59 +0000962 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +0200963 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +0200964 int64_t sector, total_sectors;
965 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200966 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +0200967 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200968 char filename[1024];
969 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +0000970
bellard19cb3732006-08-19 11:45:59 +0000971 if (!drv)
972 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200973
974 if (!bs->backing_hd) {
975 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +0000976 }
977
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200978 if (bs->backing_hd->keep_read_only) {
979 return -EACCES;
980 }
Kevin Wolfee181192010-08-05 13:05:22 +0200981
982 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200983 ro = bs->backing_hd->read_only;
984 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
985 open_flags = bs->backing_hd->open_flags;
986
987 if (ro) {
988 /* re-open as RW */
989 bdrv_delete(bs->backing_hd);
990 bs->backing_hd = NULL;
991 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +0200992 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
993 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +0200994 if (rw_ret < 0) {
995 bdrv_delete(bs_rw);
996 /* try to re-open read-only */
997 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +0200998 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
999 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001000 if (ret < 0) {
1001 bdrv_delete(bs_ro);
1002 /* drive not functional anymore */
1003 bs->drv = NULL;
1004 return ret;
1005 }
1006 bs->backing_hd = bs_ro;
1007 return rw_ret;
1008 }
1009 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001010 }
bellardea2384d2004-08-01 21:59:26 +00001011
Jan Kiszka6ea44302009-11-30 18:21:19 +01001012 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001013 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001014
Kevin Wolf8a426612010-07-16 17:17:01 +02001015 for (sector = 0; sector < total_sectors; sector += n) {
1016 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1017
1018 if (bdrv_read(bs, sector, buf, n) != 0) {
1019 ret = -EIO;
1020 goto ro_cleanup;
1021 }
1022
1023 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1024 ret = -EIO;
1025 goto ro_cleanup;
1026 }
bellardea2384d2004-08-01 21:59:26 +00001027 }
1028 }
bellard95389c82005-12-18 18:28:15 +00001029
Christoph Hellwig1d449522010-01-17 12:32:30 +01001030 if (drv->bdrv_make_empty) {
1031 ret = drv->bdrv_make_empty(bs);
1032 bdrv_flush(bs);
1033 }
bellard95389c82005-12-18 18:28:15 +00001034
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001035 /*
1036 * Make sure all data we wrote to the backing device is actually
1037 * stable on disk.
1038 */
1039 if (bs->backing_hd)
1040 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001041
1042ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001043 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001044
1045 if (ro) {
1046 /* re-open as RO */
1047 bdrv_delete(bs->backing_hd);
1048 bs->backing_hd = NULL;
1049 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001050 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1051 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001052 if (ret < 0) {
1053 bdrv_delete(bs_ro);
1054 /* drive not functional anymore */
1055 bs->drv = NULL;
1056 return ret;
1057 }
1058 bs->backing_hd = bs_ro;
1059 bs->backing_hd->keep_read_only = 0;
1060 }
1061
Christoph Hellwig1d449522010-01-17 12:32:30 +01001062 return ret;
bellard33e39632003-07-06 17:15:21 +00001063}
1064
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001065void bdrv_commit_all(void)
1066{
1067 BlockDriverState *bs;
1068
1069 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1070 bdrv_commit(bs);
1071 }
1072}
1073
Kevin Wolf756e6732010-01-12 12:55:17 +01001074/*
1075 * Return values:
1076 * 0 - success
1077 * -EINVAL - backing format specified, but no file
1078 * -ENOSPC - can't update the backing file because no space is left in the
1079 * image file header
1080 * -ENOTSUP - format driver doesn't support changing the backing file
1081 */
1082int bdrv_change_backing_file(BlockDriverState *bs,
1083 const char *backing_file, const char *backing_fmt)
1084{
1085 BlockDriver *drv = bs->drv;
1086
1087 if (drv->bdrv_change_backing_file != NULL) {
1088 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1089 } else {
1090 return -ENOTSUP;
1091 }
1092}
1093
aliguori71d07702009-03-03 17:37:16 +00001094static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1095 size_t size)
1096{
1097 int64_t len;
1098
1099 if (!bdrv_is_inserted(bs))
1100 return -ENOMEDIUM;
1101
1102 if (bs->growable)
1103 return 0;
1104
1105 len = bdrv_getlength(bs);
1106
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001107 if (offset < 0)
1108 return -EIO;
1109
1110 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001111 return -EIO;
1112
1113 return 0;
1114}
1115
1116static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1117 int nb_sectors)
1118{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001119 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1120 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001121}
1122
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001123typedef struct RwCo {
1124 BlockDriverState *bs;
1125 int64_t sector_num;
1126 int nb_sectors;
1127 QEMUIOVector *qiov;
1128 bool is_write;
1129 int ret;
1130} RwCo;
1131
1132static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1133{
1134 RwCo *rwco = opaque;
1135
1136 if (!rwco->is_write) {
1137 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1138 rwco->nb_sectors, rwco->qiov);
1139 } else {
1140 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1141 rwco->nb_sectors, rwco->qiov);
1142 }
1143}
1144
1145/*
1146 * Process a synchronous request using coroutines
1147 */
1148static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1149 int nb_sectors, bool is_write)
1150{
1151 QEMUIOVector qiov;
1152 struct iovec iov = {
1153 .iov_base = (void *)buf,
1154 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1155 };
1156 Coroutine *co;
1157 RwCo rwco = {
1158 .bs = bs,
1159 .sector_num = sector_num,
1160 .nb_sectors = nb_sectors,
1161 .qiov = &qiov,
1162 .is_write = is_write,
1163 .ret = NOT_DONE,
1164 };
1165
1166 qemu_iovec_init_external(&qiov, &iov, 1);
1167
1168 if (qemu_in_coroutine()) {
1169 /* Fast-path if already in coroutine context */
1170 bdrv_rw_co_entry(&rwco);
1171 } else {
1172 co = qemu_coroutine_create(bdrv_rw_co_entry);
1173 qemu_coroutine_enter(co, &rwco);
1174 while (rwco.ret == NOT_DONE) {
1175 qemu_aio_wait();
1176 }
1177 }
1178 return rwco.ret;
1179}
1180
bellard19cb3732006-08-19 11:45:59 +00001181/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001182int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001183 uint8_t *buf, int nb_sectors)
1184{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001185 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001186}
1187
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001188static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001189 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001190{
1191 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001192 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001193
Jan Kiszka6ea44302009-11-30 18:21:19 +01001194 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001195 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001196
1197 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001198 idx = start / (sizeof(unsigned long) * 8);
1199 bit = start % (sizeof(unsigned long) * 8);
1200 val = bs->dirty_bitmap[idx];
1201 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001202 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001203 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001204 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001205 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001206 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001207 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001208 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001209 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001210 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001211 }
1212 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001213 }
1214}
1215
ths5fafdf22007-09-16 21:08:06 +00001216/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001217 -EIO generic I/O error (may happen for all errors)
1218 -ENOMEDIUM No media inserted.
1219 -EINVAL Invalid sector number or nb_sectors
1220 -EACCES Trying to write a read-only device
1221*/
ths5fafdf22007-09-16 21:08:06 +00001222int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001223 const uint8_t *buf, int nb_sectors)
1224{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001225 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001226}
1227
aliguorieda578e2009-03-12 19:57:16 +00001228int bdrv_pread(BlockDriverState *bs, int64_t offset,
1229 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001230{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001231 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001232 int len, nb_sectors, count;
1233 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001234 int ret;
bellard83f64092006-08-01 16:21:11 +00001235
1236 count = count1;
1237 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001238 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001239 if (len > count)
1240 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001241 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001242 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001243 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1244 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001245 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001246 count -= len;
1247 if (count == 0)
1248 return count1;
1249 sector_num++;
1250 buf += len;
1251 }
1252
1253 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001254 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001255 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001256 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1257 return ret;
bellard83f64092006-08-01 16:21:11 +00001258 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001259 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001260 buf += len;
1261 count -= len;
1262 }
1263
1264 /* add data from the last sector */
1265 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001266 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1267 return ret;
bellard83f64092006-08-01 16:21:11 +00001268 memcpy(buf, tmp_buf, count);
1269 }
1270 return count1;
1271}
1272
aliguorieda578e2009-03-12 19:57:16 +00001273int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1274 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001275{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001276 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001277 int len, nb_sectors, count;
1278 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001279 int ret;
bellard83f64092006-08-01 16:21:11 +00001280
1281 count = count1;
1282 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001283 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001284 if (len > count)
1285 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001286 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001287 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001288 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1289 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001290 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001291 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1292 return ret;
bellard83f64092006-08-01 16:21:11 +00001293 count -= len;
1294 if (count == 0)
1295 return count1;
1296 sector_num++;
1297 buf += len;
1298 }
1299
1300 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001301 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001302 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001303 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1304 return ret;
bellard83f64092006-08-01 16:21:11 +00001305 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001306 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001307 buf += len;
1308 count -= len;
1309 }
1310
1311 /* add data from the last sector */
1312 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001313 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1314 return ret;
bellard83f64092006-08-01 16:21:11 +00001315 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001316 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1317 return ret;
bellard83f64092006-08-01 16:21:11 +00001318 }
1319 return count1;
1320}
bellard83f64092006-08-01 16:21:11 +00001321
Kevin Wolff08145f2010-06-16 16:38:15 +02001322/*
1323 * Writes to the file and ensures that no writes are reordered across this
1324 * request (acts as a barrier)
1325 *
1326 * Returns 0 on success, -errno in error cases.
1327 */
1328int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1329 const void *buf, int count)
1330{
1331 int ret;
1332
1333 ret = bdrv_pwrite(bs, offset, buf, count);
1334 if (ret < 0) {
1335 return ret;
1336 }
1337
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001338 /* No flush needed for cache modes that use O_DSYNC */
1339 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001340 bdrv_flush(bs);
1341 }
1342
1343 return 0;
1344}
1345
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001346/*
1347 * Handle a read request in coroutine context
1348 */
1349static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1350 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001351{
1352 BlockDriver *drv = bs->drv;
1353
Kevin Wolfda1fa912011-07-14 17:27:13 +02001354 if (!drv) {
1355 return -ENOMEDIUM;
1356 }
1357 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1358 return -EIO;
1359 }
1360
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001361 /* throttling disk read I/O */
1362 if (bs->io_limits_enabled) {
1363 bdrv_io_limits_intercept(bs, false, nb_sectors);
1364 }
1365
Kevin Wolfda1fa912011-07-14 17:27:13 +02001366 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1367}
1368
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001369int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001370 int nb_sectors, QEMUIOVector *qiov)
1371{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001372 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001373
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001374 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1375}
1376
1377/*
1378 * Handle a write request in coroutine context
1379 */
1380static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1381 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1382{
1383 BlockDriver *drv = bs->drv;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001384 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001385
1386 if (!bs->drv) {
1387 return -ENOMEDIUM;
1388 }
1389 if (bs->read_only) {
1390 return -EACCES;
1391 }
1392 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1393 return -EIO;
1394 }
1395
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001396 /* throttling disk write I/O */
1397 if (bs->io_limits_enabled) {
1398 bdrv_io_limits_intercept(bs, true, nb_sectors);
1399 }
1400
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001401 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1402
Kevin Wolfda1fa912011-07-14 17:27:13 +02001403 if (bs->dirty_bitmap) {
1404 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1405 }
1406
1407 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1408 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1409 }
1410
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001411 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001412}
1413
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001414int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1415 int nb_sectors, QEMUIOVector *qiov)
1416{
1417 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1418
1419 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1420}
1421
bellard83f64092006-08-01 16:21:11 +00001422/**
bellard83f64092006-08-01 16:21:11 +00001423 * Truncate file to 'offset' bytes (needed only for file protocols)
1424 */
1425int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1426{
1427 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001428 int ret;
bellard83f64092006-08-01 16:21:11 +00001429 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001430 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001431 if (!drv->bdrv_truncate)
1432 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001433 if (bs->read_only)
1434 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001435 if (bdrv_in_use(bs))
1436 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001437 ret = drv->bdrv_truncate(bs, offset);
1438 if (ret == 0) {
1439 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001440 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001441 }
1442 return ret;
bellard83f64092006-08-01 16:21:11 +00001443}
1444
1445/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001446 * Length of a allocated file in bytes. Sparse files are counted by actual
1447 * allocated space. Return < 0 if error or unknown.
1448 */
1449int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1450{
1451 BlockDriver *drv = bs->drv;
1452 if (!drv) {
1453 return -ENOMEDIUM;
1454 }
1455 if (drv->bdrv_get_allocated_file_size) {
1456 return drv->bdrv_get_allocated_file_size(bs);
1457 }
1458 if (bs->file) {
1459 return bdrv_get_allocated_file_size(bs->file);
1460 }
1461 return -ENOTSUP;
1462}
1463
1464/**
bellard83f64092006-08-01 16:21:11 +00001465 * Length of a file in bytes. Return < 0 if error or unknown.
1466 */
1467int64_t bdrv_getlength(BlockDriverState *bs)
1468{
1469 BlockDriver *drv = bs->drv;
1470 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001471 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001472
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001473 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001474 if (drv->bdrv_getlength) {
1475 return drv->bdrv_getlength(bs);
1476 }
bellard83f64092006-08-01 16:21:11 +00001477 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001478 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001479}
1480
bellard19cb3732006-08-19 11:45:59 +00001481/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001482void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001483{
bellard19cb3732006-08-19 11:45:59 +00001484 int64_t length;
1485 length = bdrv_getlength(bs);
1486 if (length < 0)
1487 length = 0;
1488 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001489 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001490 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001491}
bellardcf989512004-02-16 21:56:36 +00001492
aliguorif3d54fc2008-11-25 21:50:24 +00001493struct partition {
1494 uint8_t boot_ind; /* 0x80 - active */
1495 uint8_t head; /* starting head */
1496 uint8_t sector; /* starting sector */
1497 uint8_t cyl; /* starting cylinder */
1498 uint8_t sys_ind; /* What partition type */
1499 uint8_t end_head; /* end head */
1500 uint8_t end_sector; /* end sector */
1501 uint8_t end_cyl; /* end cylinder */
1502 uint32_t start_sect; /* starting sector counting from 0 */
1503 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001504} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001505
1506/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1507static int guess_disk_lchs(BlockDriverState *bs,
1508 int *pcylinders, int *pheads, int *psectors)
1509{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001510 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001511 int ret, i, heads, sectors, cylinders;
1512 struct partition *p;
1513 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001514 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001515
1516 bdrv_get_geometry(bs, &nb_sectors);
1517
1518 ret = bdrv_read(bs, 0, buf, 1);
1519 if (ret < 0)
1520 return -1;
1521 /* test msdos magic */
1522 if (buf[510] != 0x55 || buf[511] != 0xaa)
1523 return -1;
1524 for(i = 0; i < 4; i++) {
1525 p = ((struct partition *)(buf + 0x1be)) + i;
1526 nr_sects = le32_to_cpu(p->nr_sects);
1527 if (nr_sects && p->end_head) {
1528 /* We make the assumption that the partition terminates on
1529 a cylinder boundary */
1530 heads = p->end_head + 1;
1531 sectors = p->end_sector & 63;
1532 if (sectors == 0)
1533 continue;
1534 cylinders = nb_sectors / (heads * sectors);
1535 if (cylinders < 1 || cylinders > 16383)
1536 continue;
1537 *pheads = heads;
1538 *psectors = sectors;
1539 *pcylinders = cylinders;
1540#if 0
1541 printf("guessed geometry: LCHS=%d %d %d\n",
1542 cylinders, heads, sectors);
1543#endif
1544 return 0;
1545 }
1546 }
1547 return -1;
1548}
1549
1550void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1551{
1552 int translation, lba_detected = 0;
1553 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001554 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001555
1556 /* if a geometry hint is available, use it */
1557 bdrv_get_geometry(bs, &nb_sectors);
1558 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1559 translation = bdrv_get_translation_hint(bs);
1560 if (cylinders != 0) {
1561 *pcyls = cylinders;
1562 *pheads = heads;
1563 *psecs = secs;
1564 } else {
1565 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1566 if (heads > 16) {
1567 /* if heads > 16, it means that a BIOS LBA
1568 translation was active, so the default
1569 hardware geometry is OK */
1570 lba_detected = 1;
1571 goto default_geometry;
1572 } else {
1573 *pcyls = cylinders;
1574 *pheads = heads;
1575 *psecs = secs;
1576 /* disable any translation to be in sync with
1577 the logical geometry */
1578 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1579 bdrv_set_translation_hint(bs,
1580 BIOS_ATA_TRANSLATION_NONE);
1581 }
1582 }
1583 } else {
1584 default_geometry:
1585 /* if no geometry, use a standard physical disk geometry */
1586 cylinders = nb_sectors / (16 * 63);
1587
1588 if (cylinders > 16383)
1589 cylinders = 16383;
1590 else if (cylinders < 2)
1591 cylinders = 2;
1592 *pcyls = cylinders;
1593 *pheads = 16;
1594 *psecs = 63;
1595 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1596 if ((*pcyls * *pheads) <= 131072) {
1597 bdrv_set_translation_hint(bs,
1598 BIOS_ATA_TRANSLATION_LARGE);
1599 } else {
1600 bdrv_set_translation_hint(bs,
1601 BIOS_ATA_TRANSLATION_LBA);
1602 }
1603 }
1604 }
1605 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1606 }
1607}
1608
ths5fafdf22007-09-16 21:08:06 +00001609void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001610 int cyls, int heads, int secs)
1611{
1612 bs->cyls = cyls;
1613 bs->heads = heads;
1614 bs->secs = secs;
1615}
1616
bellard46d47672004-11-16 01:45:27 +00001617void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1618{
1619 bs->translation = translation;
1620}
1621
ths5fafdf22007-09-16 21:08:06 +00001622void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001623 int *pcyls, int *pheads, int *psecs)
1624{
1625 *pcyls = bs->cyls;
1626 *pheads = bs->heads;
1627 *psecs = bs->secs;
1628}
1629
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001630/* throttling disk io limits */
1631void bdrv_set_io_limits(BlockDriverState *bs,
1632 BlockIOLimit *io_limits)
1633{
1634 bs->io_limits = *io_limits;
1635 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1636}
1637
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001638/* Recognize floppy formats */
1639typedef struct FDFormat {
1640 FDriveType drive;
1641 uint8_t last_sect;
1642 uint8_t max_track;
1643 uint8_t max_head;
1644} FDFormat;
1645
1646static const FDFormat fd_formats[] = {
1647 /* First entry is default format */
1648 /* 1.44 MB 3"1/2 floppy disks */
1649 { FDRIVE_DRV_144, 18, 80, 1, },
1650 { FDRIVE_DRV_144, 20, 80, 1, },
1651 { FDRIVE_DRV_144, 21, 80, 1, },
1652 { FDRIVE_DRV_144, 21, 82, 1, },
1653 { FDRIVE_DRV_144, 21, 83, 1, },
1654 { FDRIVE_DRV_144, 22, 80, 1, },
1655 { FDRIVE_DRV_144, 23, 80, 1, },
1656 { FDRIVE_DRV_144, 24, 80, 1, },
1657 /* 2.88 MB 3"1/2 floppy disks */
1658 { FDRIVE_DRV_288, 36, 80, 1, },
1659 { FDRIVE_DRV_288, 39, 80, 1, },
1660 { FDRIVE_DRV_288, 40, 80, 1, },
1661 { FDRIVE_DRV_288, 44, 80, 1, },
1662 { FDRIVE_DRV_288, 48, 80, 1, },
1663 /* 720 kB 3"1/2 floppy disks */
1664 { FDRIVE_DRV_144, 9, 80, 1, },
1665 { FDRIVE_DRV_144, 10, 80, 1, },
1666 { FDRIVE_DRV_144, 10, 82, 1, },
1667 { FDRIVE_DRV_144, 10, 83, 1, },
1668 { FDRIVE_DRV_144, 13, 80, 1, },
1669 { FDRIVE_DRV_144, 14, 80, 1, },
1670 /* 1.2 MB 5"1/4 floppy disks */
1671 { FDRIVE_DRV_120, 15, 80, 1, },
1672 { FDRIVE_DRV_120, 18, 80, 1, },
1673 { FDRIVE_DRV_120, 18, 82, 1, },
1674 { FDRIVE_DRV_120, 18, 83, 1, },
1675 { FDRIVE_DRV_120, 20, 80, 1, },
1676 /* 720 kB 5"1/4 floppy disks */
1677 { FDRIVE_DRV_120, 9, 80, 1, },
1678 { FDRIVE_DRV_120, 11, 80, 1, },
1679 /* 360 kB 5"1/4 floppy disks */
1680 { FDRIVE_DRV_120, 9, 40, 1, },
1681 { FDRIVE_DRV_120, 9, 40, 0, },
1682 { FDRIVE_DRV_120, 10, 41, 1, },
1683 { FDRIVE_DRV_120, 10, 42, 1, },
1684 /* 320 kB 5"1/4 floppy disks */
1685 { FDRIVE_DRV_120, 8, 40, 1, },
1686 { FDRIVE_DRV_120, 8, 40, 0, },
1687 /* 360 kB must match 5"1/4 better than 3"1/2... */
1688 { FDRIVE_DRV_144, 9, 80, 0, },
1689 /* end */
1690 { FDRIVE_DRV_NONE, -1, -1, 0, },
1691};
1692
1693void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1694 int *max_track, int *last_sect,
1695 FDriveType drive_in, FDriveType *drive)
1696{
1697 const FDFormat *parse;
1698 uint64_t nb_sectors, size;
1699 int i, first_match, match;
1700
1701 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1702 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1703 /* User defined disk */
1704 } else {
1705 bdrv_get_geometry(bs, &nb_sectors);
1706 match = -1;
1707 first_match = -1;
1708 for (i = 0; ; i++) {
1709 parse = &fd_formats[i];
1710 if (parse->drive == FDRIVE_DRV_NONE) {
1711 break;
1712 }
1713 if (drive_in == parse->drive ||
1714 drive_in == FDRIVE_DRV_NONE) {
1715 size = (parse->max_head + 1) * parse->max_track *
1716 parse->last_sect;
1717 if (nb_sectors == size) {
1718 match = i;
1719 break;
1720 }
1721 if (first_match == -1) {
1722 first_match = i;
1723 }
1724 }
1725 }
1726 if (match == -1) {
1727 if (first_match == -1) {
1728 match = 1;
1729 } else {
1730 match = first_match;
1731 }
1732 parse = &fd_formats[match];
1733 }
1734 *nb_heads = parse->max_head + 1;
1735 *max_track = parse->max_track;
1736 *last_sect = parse->last_sect;
1737 *drive = parse->drive;
1738 }
1739}
1740
bellard46d47672004-11-16 01:45:27 +00001741int bdrv_get_translation_hint(BlockDriverState *bs)
1742{
1743 return bs->translation;
1744}
1745
Markus Armbrusterabd7f682010-06-02 18:55:17 +02001746void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1747 BlockErrorAction on_write_error)
1748{
1749 bs->on_read_error = on_read_error;
1750 bs->on_write_error = on_write_error;
1751}
1752
1753BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1754{
1755 return is_read ? bs->on_read_error : bs->on_write_error;
1756}
1757
bellardb3380822004-03-14 21:38:54 +00001758int bdrv_is_read_only(BlockDriverState *bs)
1759{
1760 return bs->read_only;
1761}
1762
ths985a03b2007-12-24 16:10:43 +00001763int bdrv_is_sg(BlockDriverState *bs)
1764{
1765 return bs->sg;
1766}
1767
Christoph Hellwige900a7b2009-09-04 19:01:15 +02001768int bdrv_enable_write_cache(BlockDriverState *bs)
1769{
1770 return bs->enable_write_cache;
1771}
1772
bellardea2384d2004-08-01 21:59:26 +00001773int bdrv_is_encrypted(BlockDriverState *bs)
1774{
1775 if (bs->backing_hd && bs->backing_hd->encrypted)
1776 return 1;
1777 return bs->encrypted;
1778}
1779
aliguoric0f4ce72009-03-05 23:01:01 +00001780int bdrv_key_required(BlockDriverState *bs)
1781{
1782 BlockDriverState *backing_hd = bs->backing_hd;
1783
1784 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1785 return 1;
1786 return (bs->encrypted && !bs->valid_key);
1787}
1788
bellardea2384d2004-08-01 21:59:26 +00001789int bdrv_set_key(BlockDriverState *bs, const char *key)
1790{
1791 int ret;
1792 if (bs->backing_hd && bs->backing_hd->encrypted) {
1793 ret = bdrv_set_key(bs->backing_hd, key);
1794 if (ret < 0)
1795 return ret;
1796 if (!bs->encrypted)
1797 return 0;
1798 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02001799 if (!bs->encrypted) {
1800 return -EINVAL;
1801 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1802 return -ENOMEDIUM;
1803 }
aliguoric0f4ce72009-03-05 23:01:01 +00001804 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00001805 if (ret < 0) {
1806 bs->valid_key = 0;
1807 } else if (!bs->valid_key) {
1808 bs->valid_key = 1;
1809 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001810 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00001811 }
aliguoric0f4ce72009-03-05 23:01:01 +00001812 return ret;
bellardea2384d2004-08-01 21:59:26 +00001813}
1814
1815void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1816{
bellard19cb3732006-08-19 11:45:59 +00001817 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00001818 buf[0] = '\0';
1819 } else {
1820 pstrcpy(buf, buf_size, bs->drv->format_name);
1821 }
1822}
1823
ths5fafdf22007-09-16 21:08:06 +00001824void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00001825 void *opaque)
1826{
1827 BlockDriver *drv;
1828
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01001829 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00001830 it(opaque, drv->format_name);
1831 }
1832}
1833
bellardb3380822004-03-14 21:38:54 +00001834BlockDriverState *bdrv_find(const char *name)
1835{
1836 BlockDriverState *bs;
1837
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001838 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1839 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00001840 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001841 }
bellardb3380822004-03-14 21:38:54 +00001842 }
1843 return NULL;
1844}
1845
Markus Armbruster2f399b02010-06-02 18:55:20 +02001846BlockDriverState *bdrv_next(BlockDriverState *bs)
1847{
1848 if (!bs) {
1849 return QTAILQ_FIRST(&bdrv_states);
1850 }
1851 return QTAILQ_NEXT(bs, list);
1852}
1853
aliguori51de9762009-03-05 23:00:43 +00001854void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00001855{
1856 BlockDriverState *bs;
1857
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001858 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00001859 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00001860 }
1861}
1862
bellardea2384d2004-08-01 21:59:26 +00001863const char *bdrv_get_device_name(BlockDriverState *bs)
1864{
1865 return bs->device_name;
1866}
1867
aliguoric6ca28d2008-10-06 13:55:43 +00001868void bdrv_flush_all(void)
1869{
1870 BlockDriverState *bs;
1871
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001872 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02001873 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00001874 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001875 }
1876 }
aliguoric6ca28d2008-10-06 13:55:43 +00001877}
1878
Kevin Wolff2feebb2010-04-14 17:30:35 +02001879int bdrv_has_zero_init(BlockDriverState *bs)
1880{
1881 assert(bs->drv);
1882
Kevin Wolf336c1c12010-07-28 11:26:29 +02001883 if (bs->drv->bdrv_has_zero_init) {
1884 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02001885 }
1886
1887 return 1;
1888}
1889
thsf58c7b32008-06-05 21:53:49 +00001890/*
1891 * Returns true iff the specified sector is present in the disk image. Drivers
1892 * not implementing the functionality are assumed to not support backing files,
1893 * hence all their sectors are reported as allocated.
1894 *
1895 * 'pnum' is set to the number of sectors (including and immediately following
1896 * the specified sector) that are known to be in the same
1897 * allocated/unallocated state.
1898 *
1899 * 'nb_sectors' is the max value 'pnum' should be set to.
1900 */
1901int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1902 int *pnum)
1903{
1904 int64_t n;
1905 if (!bs->drv->bdrv_is_allocated) {
1906 if (sector_num >= bs->total_sectors) {
1907 *pnum = 0;
1908 return 0;
1909 }
1910 n = bs->total_sectors - sector_num;
1911 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1912 return 1;
1913 }
1914 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1915}
1916
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02001917void bdrv_mon_event(const BlockDriverState *bdrv,
1918 BlockMonEventAction action, int is_read)
1919{
1920 QObject *data;
1921 const char *action_str;
1922
1923 switch (action) {
1924 case BDRV_ACTION_REPORT:
1925 action_str = "report";
1926 break;
1927 case BDRV_ACTION_IGNORE:
1928 action_str = "ignore";
1929 break;
1930 case BDRV_ACTION_STOP:
1931 action_str = "stop";
1932 break;
1933 default:
1934 abort();
1935 }
1936
1937 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1938 bdrv->device_name,
1939 action_str,
1940 is_read ? "read" : "write");
1941 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1942
1943 qobject_decref(data);
1944}
1945
Luiz Capitulinob2023812011-09-21 17:16:47 -03001946BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00001947{
Luiz Capitulinob2023812011-09-21 17:16:47 -03001948 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00001949 BlockDriverState *bs;
1950
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001951 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03001952 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02001953
Luiz Capitulinob2023812011-09-21 17:16:47 -03001954 info->value = g_malloc0(sizeof(*info->value));
1955 info->value->device = g_strdup(bs->device_name);
1956 info->value->type = g_strdup("unknown");
1957 info->value->locked = bdrv_dev_is_medium_locked(bs);
1958 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02001959
Markus Armbrustere4def802011-09-06 18:58:53 +02001960 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03001961 info->value->has_tray_open = true;
1962 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02001963 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03001964
1965 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03001966 info->value->has_io_status = true;
1967 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03001968 }
1969
bellard19cb3732006-08-19 11:45:59 +00001970 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03001971 info->value->has_inserted = true;
1972 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1973 info->value->inserted->file = g_strdup(bs->filename);
1974 info->value->inserted->ro = bs->read_only;
1975 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1976 info->value->inserted->encrypted = bs->encrypted;
1977 if (bs->backing_file[0]) {
1978 info->value->inserted->has_backing_file = true;
1979 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00001980 }
bellardb3380822004-03-14 21:38:54 +00001981 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03001982
1983 /* XXX: waiting for the qapi to support GSList */
1984 if (!cur_item) {
1985 head = cur_item = info;
1986 } else {
1987 cur_item->next = info;
1988 cur_item = info;
1989 }
bellardb3380822004-03-14 21:38:54 +00001990 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02001991
Luiz Capitulinob2023812011-09-21 17:16:47 -03001992 return head;
bellardb3380822004-03-14 21:38:54 +00001993}
thsa36e69d2007-12-02 05:18:19 +00001994
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03001995/* Consider exposing this as a full fledged QMP command */
1996static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00001997{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03001998 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02001999
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002000 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002001
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002002 if (bs->device_name[0]) {
2003 s->has_device = true;
2004 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002005 }
2006
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002007 s->stats = g_malloc0(sizeof(*s->stats));
2008 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2009 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2010 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2011 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2012 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2013 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2014 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2015 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2016 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2017
Kevin Wolf294cc352010-04-28 14:34:01 +02002018 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002019 s->has_parent = true;
2020 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002021 }
2022
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002023 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002024}
2025
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002026BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002027{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002028 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002029 BlockDriverState *bs;
2030
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002031 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002032 BlockStatsList *info = g_malloc0(sizeof(*info));
2033 info->value = qmp_query_blockstat(bs, NULL);
2034
2035 /* XXX: waiting for the qapi to support GSList */
2036 if (!cur_item) {
2037 head = cur_item = info;
2038 } else {
2039 cur_item->next = info;
2040 cur_item = info;
2041 }
thsa36e69d2007-12-02 05:18:19 +00002042 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002043
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002044 return head;
thsa36e69d2007-12-02 05:18:19 +00002045}
bellardea2384d2004-08-01 21:59:26 +00002046
aliguori045df332009-03-05 23:00:48 +00002047const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2048{
2049 if (bs->backing_hd && bs->backing_hd->encrypted)
2050 return bs->backing_file;
2051 else if (bs->encrypted)
2052 return bs->filename;
2053 else
2054 return NULL;
2055}
2056
ths5fafdf22007-09-16 21:08:06 +00002057void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002058 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002059{
Kevin Wolf3574c602011-10-26 11:02:11 +02002060 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002061}
2062
ths5fafdf22007-09-16 21:08:06 +00002063int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002064 const uint8_t *buf, int nb_sectors)
2065{
2066 BlockDriver *drv = bs->drv;
2067 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002068 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002069 if (!drv->bdrv_write_compressed)
2070 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002071 if (bdrv_check_request(bs, sector_num, nb_sectors))
2072 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002073
Jan Kiszkac6d22832009-11-30 18:21:20 +01002074 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002075 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2076 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002077
bellardfaea38e2006-08-05 21:31:00 +00002078 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2079}
ths3b46e622007-09-17 08:09:54 +00002080
bellardfaea38e2006-08-05 21:31:00 +00002081int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2082{
2083 BlockDriver *drv = bs->drv;
2084 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002085 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002086 if (!drv->bdrv_get_info)
2087 return -ENOTSUP;
2088 memset(bdi, 0, sizeof(*bdi));
2089 return drv->bdrv_get_info(bs, bdi);
2090}
2091
Christoph Hellwig45566e92009-07-10 23:11:57 +02002092int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2093 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002094{
2095 BlockDriver *drv = bs->drv;
2096 if (!drv)
2097 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002098 if (drv->bdrv_save_vmstate)
2099 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2100 if (bs->file)
2101 return bdrv_save_vmstate(bs->file, buf, pos, size);
2102 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002103}
2104
Christoph Hellwig45566e92009-07-10 23:11:57 +02002105int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2106 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002107{
2108 BlockDriver *drv = bs->drv;
2109 if (!drv)
2110 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002111 if (drv->bdrv_load_vmstate)
2112 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2113 if (bs->file)
2114 return bdrv_load_vmstate(bs->file, buf, pos, size);
2115 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002116}
2117
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002118void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2119{
2120 BlockDriver *drv = bs->drv;
2121
2122 if (!drv || !drv->bdrv_debug_event) {
2123 return;
2124 }
2125
2126 return drv->bdrv_debug_event(bs, event);
2127
2128}
2129
bellardfaea38e2006-08-05 21:31:00 +00002130/**************************************************************/
2131/* handling of snapshots */
2132
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002133int bdrv_can_snapshot(BlockDriverState *bs)
2134{
2135 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002136 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002137 return 0;
2138 }
2139
2140 if (!drv->bdrv_snapshot_create) {
2141 if (bs->file != NULL) {
2142 return bdrv_can_snapshot(bs->file);
2143 }
2144 return 0;
2145 }
2146
2147 return 1;
2148}
2149
Blue Swirl199630b2010-07-25 20:49:34 +00002150int bdrv_is_snapshot(BlockDriverState *bs)
2151{
2152 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2153}
2154
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002155BlockDriverState *bdrv_snapshots(void)
2156{
2157 BlockDriverState *bs;
2158
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002159 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002160 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002161 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002162
2163 bs = NULL;
2164 while ((bs = bdrv_next(bs))) {
2165 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002166 bs_snapshots = bs;
2167 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002168 }
2169 }
2170 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002171}
2172
ths5fafdf22007-09-16 21:08:06 +00002173int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002174 QEMUSnapshotInfo *sn_info)
2175{
2176 BlockDriver *drv = bs->drv;
2177 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002178 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002179 if (drv->bdrv_snapshot_create)
2180 return drv->bdrv_snapshot_create(bs, sn_info);
2181 if (bs->file)
2182 return bdrv_snapshot_create(bs->file, sn_info);
2183 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002184}
2185
ths5fafdf22007-09-16 21:08:06 +00002186int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002187 const char *snapshot_id)
2188{
2189 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002190 int ret, open_ret;
2191
bellardfaea38e2006-08-05 21:31:00 +00002192 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002193 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002194 if (drv->bdrv_snapshot_goto)
2195 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2196
2197 if (bs->file) {
2198 drv->bdrv_close(bs);
2199 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2200 open_ret = drv->bdrv_open(bs, bs->open_flags);
2201 if (open_ret < 0) {
2202 bdrv_delete(bs->file);
2203 bs->drv = NULL;
2204 return open_ret;
2205 }
2206 return ret;
2207 }
2208
2209 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002210}
2211
2212int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2213{
2214 BlockDriver *drv = bs->drv;
2215 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002216 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002217 if (drv->bdrv_snapshot_delete)
2218 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2219 if (bs->file)
2220 return bdrv_snapshot_delete(bs->file, snapshot_id);
2221 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002222}
2223
ths5fafdf22007-09-16 21:08:06 +00002224int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002225 QEMUSnapshotInfo **psn_info)
2226{
2227 BlockDriver *drv = bs->drv;
2228 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002229 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002230 if (drv->bdrv_snapshot_list)
2231 return drv->bdrv_snapshot_list(bs, psn_info);
2232 if (bs->file)
2233 return bdrv_snapshot_list(bs->file, psn_info);
2234 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002235}
2236
edison51ef6722010-09-21 19:58:41 -07002237int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2238 const char *snapshot_name)
2239{
2240 BlockDriver *drv = bs->drv;
2241 if (!drv) {
2242 return -ENOMEDIUM;
2243 }
2244 if (!bs->read_only) {
2245 return -EINVAL;
2246 }
2247 if (drv->bdrv_snapshot_load_tmp) {
2248 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2249 }
2250 return -ENOTSUP;
2251}
2252
bellardfaea38e2006-08-05 21:31:00 +00002253#define NB_SUFFIXES 4
2254
2255char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2256{
2257 static const char suffixes[NB_SUFFIXES] = "KMGT";
2258 int64_t base;
2259 int i;
2260
2261 if (size <= 999) {
2262 snprintf(buf, buf_size, "%" PRId64, size);
2263 } else {
2264 base = 1024;
2265 for(i = 0; i < NB_SUFFIXES; i++) {
2266 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002267 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002268 (double)size / base,
2269 suffixes[i]);
2270 break;
2271 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002272 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002273 ((size + (base >> 1)) / base),
2274 suffixes[i]);
2275 break;
2276 }
2277 base = base * 1024;
2278 }
2279 }
2280 return buf;
2281}
2282
2283char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2284{
2285 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002286#ifdef _WIN32
2287 struct tm *ptm;
2288#else
bellardfaea38e2006-08-05 21:31:00 +00002289 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002290#endif
bellardfaea38e2006-08-05 21:31:00 +00002291 time_t ti;
2292 int64_t secs;
2293
2294 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002295 snprintf(buf, buf_size,
2296 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002297 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2298 } else {
2299 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002300#ifdef _WIN32
2301 ptm = localtime(&ti);
2302 strftime(date_buf, sizeof(date_buf),
2303 "%Y-%m-%d %H:%M:%S", ptm);
2304#else
bellardfaea38e2006-08-05 21:31:00 +00002305 localtime_r(&ti, &tm);
2306 strftime(date_buf, sizeof(date_buf),
2307 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002308#endif
bellardfaea38e2006-08-05 21:31:00 +00002309 secs = sn->vm_clock_nsec / 1000000000;
2310 snprintf(clock_buf, sizeof(clock_buf),
2311 "%02d:%02d:%02d.%03d",
2312 (int)(secs / 3600),
2313 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002314 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002315 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2316 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002317 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002318 sn->id_str, sn->name,
2319 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2320 date_buf,
2321 clock_buf);
2322 }
2323 return buf;
2324}
2325
bellard83f64092006-08-01 16:21:11 +00002326/**************************************************************/
2327/* async I/Os */
2328
aliguori3b69e4b2009-01-22 16:59:24 +00002329BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002330 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002331 BlockDriverCompletionFunc *cb, void *opaque)
2332{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002333 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2334
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002335 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002336 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002337}
2338
aliguorif141eaf2009-04-07 18:43:24 +00002339BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2340 QEMUIOVector *qiov, int nb_sectors,
2341 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002342{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002343 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2344
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002345 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002346 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002347}
2348
Kevin Wolf40b4f532009-09-09 17:53:37 +02002349
2350typedef struct MultiwriteCB {
2351 int error;
2352 int num_requests;
2353 int num_callbacks;
2354 struct {
2355 BlockDriverCompletionFunc *cb;
2356 void *opaque;
2357 QEMUIOVector *free_qiov;
2358 void *free_buf;
2359 } callbacks[];
2360} MultiwriteCB;
2361
2362static void multiwrite_user_cb(MultiwriteCB *mcb)
2363{
2364 int i;
2365
2366 for (i = 0; i < mcb->num_callbacks; i++) {
2367 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002368 if (mcb->callbacks[i].free_qiov) {
2369 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2370 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002371 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002372 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002373 }
2374}
2375
2376static void multiwrite_cb(void *opaque, int ret)
2377{
2378 MultiwriteCB *mcb = opaque;
2379
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002380 trace_multiwrite_cb(mcb, ret);
2381
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002382 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002383 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002384 }
2385
2386 mcb->num_requests--;
2387 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002388 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002389 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002390 }
2391}
2392
2393static int multiwrite_req_compare(const void *a, const void *b)
2394{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002395 const BlockRequest *req1 = a, *req2 = b;
2396
2397 /*
2398 * Note that we can't simply subtract req2->sector from req1->sector
2399 * here as that could overflow the return value.
2400 */
2401 if (req1->sector > req2->sector) {
2402 return 1;
2403 } else if (req1->sector < req2->sector) {
2404 return -1;
2405 } else {
2406 return 0;
2407 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002408}
2409
2410/*
2411 * Takes a bunch of requests and tries to merge them. Returns the number of
2412 * requests that remain after merging.
2413 */
2414static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2415 int num_reqs, MultiwriteCB *mcb)
2416{
2417 int i, outidx;
2418
2419 // Sort requests by start sector
2420 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2421
2422 // Check if adjacent requests touch the same clusters. If so, combine them,
2423 // filling up gaps with zero sectors.
2424 outidx = 0;
2425 for (i = 1; i < num_reqs; i++) {
2426 int merge = 0;
2427 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2428
2429 // This handles the cases that are valid for all block drivers, namely
2430 // exactly sequential writes and overlapping writes.
2431 if (reqs[i].sector <= oldreq_last) {
2432 merge = 1;
2433 }
2434
2435 // The block driver may decide that it makes sense to combine requests
2436 // even if there is a gap of some sectors between them. In this case,
2437 // the gap is filled with zeros (therefore only applicable for yet
2438 // unused space in format like qcow2).
2439 if (!merge && bs->drv->bdrv_merge_requests) {
2440 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2441 }
2442
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002443 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2444 merge = 0;
2445 }
2446
Kevin Wolf40b4f532009-09-09 17:53:37 +02002447 if (merge) {
2448 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002449 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002450 qemu_iovec_init(qiov,
2451 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2452
2453 // Add the first request to the merged one. If the requests are
2454 // overlapping, drop the last sectors of the first request.
2455 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2456 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2457
2458 // We might need to add some zeros between the two requests
2459 if (reqs[i].sector > oldreq_last) {
2460 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2461 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2462 memset(buf, 0, zero_bytes);
2463 qemu_iovec_add(qiov, buf, zero_bytes);
2464 mcb->callbacks[i].free_buf = buf;
2465 }
2466
2467 // Add the second request
2468 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2469
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002470 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002471 reqs[outidx].qiov = qiov;
2472
2473 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2474 } else {
2475 outidx++;
2476 reqs[outidx].sector = reqs[i].sector;
2477 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2478 reqs[outidx].qiov = reqs[i].qiov;
2479 }
2480 }
2481
2482 return outidx + 1;
2483}
2484
2485/*
2486 * Submit multiple AIO write requests at once.
2487 *
2488 * On success, the function returns 0 and all requests in the reqs array have
2489 * been submitted. In error case this function returns -1, and any of the
2490 * requests may or may not be submitted yet. In particular, this means that the
2491 * callback will be called for some of the requests, for others it won't. The
2492 * caller must check the error field of the BlockRequest to wait for the right
2493 * callbacks (if error != 0, no callback will be called).
2494 *
2495 * The implementation may modify the contents of the reqs array, e.g. to merge
2496 * requests. However, the fields opaque and error are left unmodified as they
2497 * are used to signal failure for a single request to the caller.
2498 */
2499int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2500{
2501 BlockDriverAIOCB *acb;
2502 MultiwriteCB *mcb;
2503 int i;
2504
Ryan Harper301db7c2011-03-07 10:01:04 -06002505 /* don't submit writes if we don't have a medium */
2506 if (bs->drv == NULL) {
2507 for (i = 0; i < num_reqs; i++) {
2508 reqs[i].error = -ENOMEDIUM;
2509 }
2510 return -1;
2511 }
2512
Kevin Wolf40b4f532009-09-09 17:53:37 +02002513 if (num_reqs == 0) {
2514 return 0;
2515 }
2516
2517 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002518 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002519 mcb->num_requests = 0;
2520 mcb->num_callbacks = num_reqs;
2521
2522 for (i = 0; i < num_reqs; i++) {
2523 mcb->callbacks[i].cb = reqs[i].cb;
2524 mcb->callbacks[i].opaque = reqs[i].opaque;
2525 }
2526
2527 // Check for mergable requests
2528 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2529
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002530 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2531
Kevin Wolf453f9a12010-07-02 14:01:21 +02002532 /*
2533 * Run the aio requests. As soon as one request can't be submitted
2534 * successfully, fail all requests that are not yet submitted (we must
2535 * return failure for all requests anyway)
2536 *
2537 * num_requests cannot be set to the right value immediately: If
2538 * bdrv_aio_writev fails for some request, num_requests would be too high
2539 * and therefore multiwrite_cb() would never recognize the multiwrite
2540 * request as completed. We also cannot use the loop variable i to set it
2541 * when the first request fails because the callback may already have been
2542 * called for previously submitted requests. Thus, num_requests must be
2543 * incremented for each request that is submitted.
2544 *
2545 * The problem that callbacks may be called early also means that we need
2546 * to take care that num_requests doesn't become 0 before all requests are
2547 * submitted - multiwrite_cb() would consider the multiwrite request
2548 * completed. A dummy request that is "completed" by a manual call to
2549 * multiwrite_cb() takes care of this.
2550 */
2551 mcb->num_requests = 1;
2552
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002553 // Run the aio requests
Kevin Wolf40b4f532009-09-09 17:53:37 +02002554 for (i = 0; i < num_reqs; i++) {
Kevin Wolf453f9a12010-07-02 14:01:21 +02002555 mcb->num_requests++;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002556 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2557 reqs[i].nb_sectors, multiwrite_cb, mcb);
2558
2559 if (acb == NULL) {
2560 // We can only fail the whole thing if no request has been
2561 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2562 // complete and report the error in the callback.
Kevin Wolf453f9a12010-07-02 14:01:21 +02002563 if (i == 0) {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002564 trace_bdrv_aio_multiwrite_earlyfail(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002565 goto fail;
2566 } else {
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002567 trace_bdrv_aio_multiwrite_latefail(mcb, i);
Kevin Wolf7eb58a62010-04-06 18:24:07 +02002568 multiwrite_cb(mcb, -EIO);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002569 break;
2570 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002571 }
2572 }
2573
Kevin Wolf453f9a12010-07-02 14:01:21 +02002574 /* Complete the dummy request */
2575 multiwrite_cb(mcb, 0);
2576
Kevin Wolf40b4f532009-09-09 17:53:37 +02002577 return 0;
2578
2579fail:
Kevin Wolf453f9a12010-07-02 14:01:21 +02002580 for (i = 0; i < mcb->num_callbacks; i++) {
2581 reqs[i].error = -EIO;
2582 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002583 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002584 return -1;
2585}
2586
bellard83f64092006-08-01 16:21:11 +00002587void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002588{
aliguori6bbff9a2009-03-20 18:25:59 +00002589 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002590}
2591
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002592/* block I/O throttling */
2593static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2594 bool is_write, double elapsed_time, uint64_t *wait)
2595{
2596 uint64_t bps_limit = 0;
2597 double bytes_limit, bytes_base, bytes_res;
2598 double slice_time, wait_time;
2599
2600 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2601 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2602 } else if (bs->io_limits.bps[is_write]) {
2603 bps_limit = bs->io_limits.bps[is_write];
2604 } else {
2605 if (wait) {
2606 *wait = 0;
2607 }
2608
2609 return false;
2610 }
2611
2612 slice_time = bs->slice_end - bs->slice_start;
2613 slice_time /= (NANOSECONDS_PER_SECOND);
2614 bytes_limit = bps_limit * slice_time;
2615 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2616 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2617 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2618 }
2619
2620 /* bytes_base: the bytes of data which have been read/written; and
2621 * it is obtained from the history statistic info.
2622 * bytes_res: the remaining bytes of data which need to be read/written.
2623 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2624 * the total time for completing reading/writting all data.
2625 */
2626 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2627
2628 if (bytes_base + bytes_res <= bytes_limit) {
2629 if (wait) {
2630 *wait = 0;
2631 }
2632
2633 return false;
2634 }
2635
2636 /* Calc approx time to dispatch */
2637 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2638
2639 /* When the I/O rate at runtime exceeds the limits,
2640 * bs->slice_end need to be extended in order that the current statistic
2641 * info can be kept until the timer fire, so it is increased and tuned
2642 * based on the result of experiment.
2643 */
2644 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2645 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2646 if (wait) {
2647 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2648 }
2649
2650 return true;
2651}
2652
2653static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2654 double elapsed_time, uint64_t *wait)
2655{
2656 uint64_t iops_limit = 0;
2657 double ios_limit, ios_base;
2658 double slice_time, wait_time;
2659
2660 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2661 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2662 } else if (bs->io_limits.iops[is_write]) {
2663 iops_limit = bs->io_limits.iops[is_write];
2664 } else {
2665 if (wait) {
2666 *wait = 0;
2667 }
2668
2669 return false;
2670 }
2671
2672 slice_time = bs->slice_end - bs->slice_start;
2673 slice_time /= (NANOSECONDS_PER_SECOND);
2674 ios_limit = iops_limit * slice_time;
2675 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2676 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2677 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2678 }
2679
2680 if (ios_base + 1 <= ios_limit) {
2681 if (wait) {
2682 *wait = 0;
2683 }
2684
2685 return false;
2686 }
2687
2688 /* Calc approx time to dispatch */
2689 wait_time = (ios_base + 1) / iops_limit;
2690 if (wait_time > elapsed_time) {
2691 wait_time = wait_time - elapsed_time;
2692 } else {
2693 wait_time = 0;
2694 }
2695
2696 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2697 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2698 if (wait) {
2699 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2700 }
2701
2702 return true;
2703}
2704
2705static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2706 bool is_write, int64_t *wait)
2707{
2708 int64_t now, max_wait;
2709 uint64_t bps_wait = 0, iops_wait = 0;
2710 double elapsed_time;
2711 int bps_ret, iops_ret;
2712
2713 now = qemu_get_clock_ns(vm_clock);
2714 if ((bs->slice_start < now)
2715 && (bs->slice_end > now)) {
2716 bs->slice_end = now + bs->slice_time;
2717 } else {
2718 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2719 bs->slice_start = now;
2720 bs->slice_end = now + bs->slice_time;
2721
2722 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
2723 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
2724
2725 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
2726 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
2727 }
2728
2729 elapsed_time = now - bs->slice_start;
2730 elapsed_time /= (NANOSECONDS_PER_SECOND);
2731
2732 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
2733 is_write, elapsed_time, &bps_wait);
2734 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
2735 elapsed_time, &iops_wait);
2736 if (bps_ret || iops_ret) {
2737 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
2738 if (wait) {
2739 *wait = max_wait;
2740 }
2741
2742 now = qemu_get_clock_ns(vm_clock);
2743 if (bs->slice_end < now + max_wait) {
2744 bs->slice_end = now + max_wait;
2745 }
2746
2747 return true;
2748 }
2749
2750 if (wait) {
2751 *wait = 0;
2752 }
2753
2754 return false;
2755}
pbrookce1a14d2006-08-07 02:38:06 +00002756
bellard83f64092006-08-01 16:21:11 +00002757/**************************************************************/
2758/* async block device emulation */
2759
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002760typedef struct BlockDriverAIOCBSync {
2761 BlockDriverAIOCB common;
2762 QEMUBH *bh;
2763 int ret;
2764 /* vector translation state */
2765 QEMUIOVector *qiov;
2766 uint8_t *bounce;
2767 int is_write;
2768} BlockDriverAIOCBSync;
2769
2770static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2771{
Kevin Wolfb666d232010-05-05 11:44:39 +02002772 BlockDriverAIOCBSync *acb =
2773 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03002774 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03002775 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002776 qemu_aio_release(acb);
2777}
2778
2779static AIOPool bdrv_em_aio_pool = {
2780 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2781 .cancel = bdrv_aio_cancel_em,
2782};
2783
bellard83f64092006-08-01 16:21:11 +00002784static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00002785{
pbrookce1a14d2006-08-07 02:38:06 +00002786 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00002787
aliguorif141eaf2009-04-07 18:43:24 +00002788 if (!acb->is_write)
2789 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00002790 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00002791 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03002792 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03002793 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00002794 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00002795}
bellardbeac80c2006-06-26 20:08:57 +00002796
aliguorif141eaf2009-04-07 18:43:24 +00002797static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2798 int64_t sector_num,
2799 QEMUIOVector *qiov,
2800 int nb_sectors,
2801 BlockDriverCompletionFunc *cb,
2802 void *opaque,
2803 int is_write)
2804
bellardea2384d2004-08-01 21:59:26 +00002805{
pbrookce1a14d2006-08-07 02:38:06 +00002806 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00002807
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002808 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00002809 acb->is_write = is_write;
2810 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00002811 acb->bounce = qemu_blockalign(bs, qiov->size);
aliguorif141eaf2009-04-07 18:43:24 +00002812
pbrookce1a14d2006-08-07 02:38:06 +00002813 if (!acb->bh)
2814 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00002815
2816 if (is_write) {
2817 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01002818 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00002819 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01002820 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00002821 }
2822
pbrookce1a14d2006-08-07 02:38:06 +00002823 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00002824
pbrookce1a14d2006-08-07 02:38:06 +00002825 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00002826}
2827
aliguorif141eaf2009-04-07 18:43:24 +00002828static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2829 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00002830 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002831{
aliguorif141eaf2009-04-07 18:43:24 +00002832 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00002833}
2834
aliguorif141eaf2009-04-07 18:43:24 +00002835static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2836 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2837 BlockDriverCompletionFunc *cb, void *opaque)
2838{
2839 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2840}
2841
Kevin Wolf68485422011-06-30 10:05:46 +02002842
2843typedef struct BlockDriverAIOCBCoroutine {
2844 BlockDriverAIOCB common;
2845 BlockRequest req;
2846 bool is_write;
2847 QEMUBH* bh;
2848} BlockDriverAIOCBCoroutine;
2849
2850static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2851{
2852 qemu_aio_flush();
2853}
2854
2855static AIOPool bdrv_em_co_aio_pool = {
2856 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2857 .cancel = bdrv_aio_co_cancel_em,
2858};
2859
Paolo Bonzini35246a62011-10-14 10:41:29 +02002860static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02002861{
2862 BlockDriverAIOCBCoroutine *acb = opaque;
2863
2864 acb->common.cb(acb->common.opaque, acb->req.error);
2865 qemu_bh_delete(acb->bh);
2866 qemu_aio_release(acb);
2867}
2868
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002869/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2870static void coroutine_fn bdrv_co_do_rw(void *opaque)
2871{
2872 BlockDriverAIOCBCoroutine *acb = opaque;
2873 BlockDriverState *bs = acb->common.bs;
2874
2875 if (!acb->is_write) {
2876 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2877 acb->req.nb_sectors, acb->req.qiov);
2878 } else {
2879 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2880 acb->req.nb_sectors, acb->req.qiov);
2881 }
2882
Paolo Bonzini35246a62011-10-14 10:41:29 +02002883 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002884 qemu_bh_schedule(acb->bh);
2885}
2886
Kevin Wolf68485422011-06-30 10:05:46 +02002887static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2888 int64_t sector_num,
2889 QEMUIOVector *qiov,
2890 int nb_sectors,
2891 BlockDriverCompletionFunc *cb,
2892 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002893 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02002894{
2895 Coroutine *co;
2896 BlockDriverAIOCBCoroutine *acb;
2897
2898 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2899 acb->req.sector = sector_num;
2900 acb->req.nb_sectors = nb_sectors;
2901 acb->req.qiov = qiov;
2902 acb->is_write = is_write;
2903
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002904 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02002905 qemu_coroutine_enter(co, acb);
2906
2907 return &acb->common;
2908}
2909
Paolo Bonzini07f07612011-10-17 12:32:12 +02002910static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02002911{
Paolo Bonzini07f07612011-10-17 12:32:12 +02002912 BlockDriverAIOCBCoroutine *acb = opaque;
2913 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02002914
Paolo Bonzini07f07612011-10-17 12:32:12 +02002915 acb->req.error = bdrv_co_flush(bs);
2916 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02002917 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02002918}
2919
Paolo Bonzini07f07612011-10-17 12:32:12 +02002920BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02002921 BlockDriverCompletionFunc *cb, void *opaque)
2922{
Paolo Bonzini07f07612011-10-17 12:32:12 +02002923 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02002924
Paolo Bonzini07f07612011-10-17 12:32:12 +02002925 Coroutine *co;
2926 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02002927
Paolo Bonzini07f07612011-10-17 12:32:12 +02002928 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2929 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2930 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02002931
Alexander Graf016f5cf2010-05-26 17:51:49 +02002932 return &acb->common;
2933}
2934
Paolo Bonzini4265d622011-10-17 12:32:14 +02002935static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2936{
2937 BlockDriverAIOCBCoroutine *acb = opaque;
2938 BlockDriverState *bs = acb->common.bs;
2939
2940 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2941 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2942 qemu_bh_schedule(acb->bh);
2943}
2944
2945BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2946 int64_t sector_num, int nb_sectors,
2947 BlockDriverCompletionFunc *cb, void *opaque)
2948{
2949 Coroutine *co;
2950 BlockDriverAIOCBCoroutine *acb;
2951
2952 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2953
2954 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2955 acb->req.sector = sector_num;
2956 acb->req.nb_sectors = nb_sectors;
2957 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2958 qemu_coroutine_enter(co, acb);
2959
2960 return &acb->common;
2961}
2962
bellardea2384d2004-08-01 21:59:26 +00002963void bdrv_init(void)
2964{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05002965 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00002966}
pbrookce1a14d2006-08-07 02:38:06 +00002967
Markus Armbrustereb852012009-10-27 18:41:44 +01002968void bdrv_init_with_whitelist(void)
2969{
2970 use_bdrv_whitelist = 1;
2971 bdrv_init();
2972}
2973
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02002974void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2975 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00002976{
pbrookce1a14d2006-08-07 02:38:06 +00002977 BlockDriverAIOCB *acb;
2978
aliguori6bbff9a2009-03-20 18:25:59 +00002979 if (pool->free_aiocb) {
2980 acb = pool->free_aiocb;
2981 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00002982 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05002983 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00002984 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00002985 }
2986 acb->bs = bs;
2987 acb->cb = cb;
2988 acb->opaque = opaque;
2989 return acb;
2990}
2991
2992void qemu_aio_release(void *p)
2993{
aliguori6bbff9a2009-03-20 18:25:59 +00002994 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2995 AIOPool *pool = acb->pool;
2996 acb->next = pool->free_aiocb;
2997 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00002998}
bellard19cb3732006-08-19 11:45:59 +00002999
3000/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003001/* Coroutine block device emulation */
3002
3003typedef struct CoroutineIOCompletion {
3004 Coroutine *coroutine;
3005 int ret;
3006} CoroutineIOCompletion;
3007
3008static void bdrv_co_io_em_complete(void *opaque, int ret)
3009{
3010 CoroutineIOCompletion *co = opaque;
3011
3012 co->ret = ret;
3013 qemu_coroutine_enter(co->coroutine, NULL);
3014}
3015
3016static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3017 int nb_sectors, QEMUIOVector *iov,
3018 bool is_write)
3019{
3020 CoroutineIOCompletion co = {
3021 .coroutine = qemu_coroutine_self(),
3022 };
3023 BlockDriverAIOCB *acb;
3024
3025 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003026 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3027 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003028 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003029 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3030 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003031 }
3032
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003033 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003034 if (!acb) {
3035 return -EIO;
3036 }
3037 qemu_coroutine_yield();
3038
3039 return co.ret;
3040}
3041
3042static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3043 int64_t sector_num, int nb_sectors,
3044 QEMUIOVector *iov)
3045{
3046 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3047}
3048
3049static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3050 int64_t sector_num, int nb_sectors,
3051 QEMUIOVector *iov)
3052{
3053 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3054}
3055
Paolo Bonzini07f07612011-10-17 12:32:12 +02003056static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003057{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003058 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003059
Paolo Bonzini07f07612011-10-17 12:32:12 +02003060 rwco->ret = bdrv_co_flush(rwco->bs);
3061}
3062
3063int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3064{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003065 int ret;
3066
Kevin Wolfca716362011-11-10 18:13:59 +01003067 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003068 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003069 }
3070
Kevin Wolfca716362011-11-10 18:13:59 +01003071 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003072 if (bs->drv->bdrv_co_flush_to_os) {
3073 ret = bs->drv->bdrv_co_flush_to_os(bs);
3074 if (ret < 0) {
3075 return ret;
3076 }
3077 }
3078
Kevin Wolfca716362011-11-10 18:13:59 +01003079 /* But don't actually force it to the disk with cache=unsafe */
3080 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3081 return 0;
3082 }
3083
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003084 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003085 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003086 } else if (bs->drv->bdrv_aio_flush) {
3087 BlockDriverAIOCB *acb;
3088 CoroutineIOCompletion co = {
3089 .coroutine = qemu_coroutine_self(),
3090 };
3091
3092 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3093 if (acb == NULL) {
3094 return -EIO;
3095 } else {
3096 qemu_coroutine_yield();
3097 return co.ret;
3098 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003099 } else {
3100 /*
3101 * Some block drivers always operate in either writethrough or unsafe
3102 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3103 * know how the server works (because the behaviour is hardcoded or
3104 * depends on server-side configuration), so we can't ensure that
3105 * everything is safe on disk. Returning an error doesn't work because
3106 * that would break guests even if the server operates in writethrough
3107 * mode.
3108 *
3109 * Let's hope the user knows what he's doing.
3110 */
3111 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003112 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003113}
3114
Anthony Liguori0f154232011-11-14 15:09:45 -06003115void bdrv_invalidate_cache(BlockDriverState *bs)
3116{
3117 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3118 bs->drv->bdrv_invalidate_cache(bs);
3119 }
3120}
3121
3122void bdrv_invalidate_cache_all(void)
3123{
3124 BlockDriverState *bs;
3125
3126 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3127 bdrv_invalidate_cache(bs);
3128 }
3129}
3130
Paolo Bonzini07f07612011-10-17 12:32:12 +02003131int bdrv_flush(BlockDriverState *bs)
3132{
3133 Coroutine *co;
3134 RwCo rwco = {
3135 .bs = bs,
3136 .ret = NOT_DONE,
3137 };
3138
3139 if (qemu_in_coroutine()) {
3140 /* Fast-path if already in coroutine context */
3141 bdrv_flush_co_entry(&rwco);
3142 } else {
3143 co = qemu_coroutine_create(bdrv_flush_co_entry);
3144 qemu_coroutine_enter(co, &rwco);
3145 while (rwco.ret == NOT_DONE) {
3146 qemu_aio_wait();
3147 }
3148 }
3149
3150 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003151}
3152
Paolo Bonzini4265d622011-10-17 12:32:14 +02003153static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3154{
3155 RwCo *rwco = opaque;
3156
3157 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3158}
3159
3160int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3161 int nb_sectors)
3162{
3163 if (!bs->drv) {
3164 return -ENOMEDIUM;
3165 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3166 return -EIO;
3167 } else if (bs->read_only) {
3168 return -EROFS;
3169 } else if (bs->drv->bdrv_co_discard) {
3170 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3171 } else if (bs->drv->bdrv_aio_discard) {
3172 BlockDriverAIOCB *acb;
3173 CoroutineIOCompletion co = {
3174 .coroutine = qemu_coroutine_self(),
3175 };
3176
3177 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3178 bdrv_co_io_em_complete, &co);
3179 if (acb == NULL) {
3180 return -EIO;
3181 } else {
3182 qemu_coroutine_yield();
3183 return co.ret;
3184 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003185 } else {
3186 return 0;
3187 }
3188}
3189
3190int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3191{
3192 Coroutine *co;
3193 RwCo rwco = {
3194 .bs = bs,
3195 .sector_num = sector_num,
3196 .nb_sectors = nb_sectors,
3197 .ret = NOT_DONE,
3198 };
3199
3200 if (qemu_in_coroutine()) {
3201 /* Fast-path if already in coroutine context */
3202 bdrv_discard_co_entry(&rwco);
3203 } else {
3204 co = qemu_coroutine_create(bdrv_discard_co_entry);
3205 qemu_coroutine_enter(co, &rwco);
3206 while (rwco.ret == NOT_DONE) {
3207 qemu_aio_wait();
3208 }
3209 }
3210
3211 return rwco.ret;
3212}
3213
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003214/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003215/* removable device support */
3216
3217/**
3218 * Return TRUE if the media is present
3219 */
3220int bdrv_is_inserted(BlockDriverState *bs)
3221{
3222 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003223
bellard19cb3732006-08-19 11:45:59 +00003224 if (!drv)
3225 return 0;
3226 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003227 return 1;
3228 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003229}
3230
3231/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003232 * Return whether the media changed since the last call to this
3233 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003234 */
3235int bdrv_media_changed(BlockDriverState *bs)
3236{
3237 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003238
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003239 if (drv && drv->bdrv_media_changed) {
3240 return drv->bdrv_media_changed(bs);
3241 }
3242 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003243}
3244
3245/**
3246 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3247 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003248void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003249{
3250 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003251
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003252 if (drv && drv->bdrv_eject) {
3253 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003254 }
bellard19cb3732006-08-19 11:45:59 +00003255}
3256
bellard19cb3732006-08-19 11:45:59 +00003257/**
3258 * Lock or unlock the media (if it is locked, the user won't be able
3259 * to eject it manually).
3260 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003261void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003262{
3263 BlockDriver *drv = bs->drv;
3264
Markus Armbruster025e8492011-09-06 18:58:47 +02003265 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003266
Markus Armbruster025e8492011-09-06 18:58:47 +02003267 if (drv && drv->bdrv_lock_medium) {
3268 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003269 }
3270}
ths985a03b2007-12-24 16:10:43 +00003271
3272/* needed for generic scsi interface */
3273
3274int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3275{
3276 BlockDriver *drv = bs->drv;
3277
3278 if (drv && drv->bdrv_ioctl)
3279 return drv->bdrv_ioctl(bs, req, buf);
3280 return -ENOTSUP;
3281}
aliguori7d780662009-03-12 19:57:08 +00003282
aliguori221f7152009-03-28 17:28:41 +00003283BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3284 unsigned long int req, void *buf,
3285 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003286{
aliguori221f7152009-03-28 17:28:41 +00003287 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003288
aliguori221f7152009-03-28 17:28:41 +00003289 if (drv && drv->bdrv_aio_ioctl)
3290 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3291 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003292}
aliguorie268ca52009-04-22 20:20:00 +00003293
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003294void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3295{
3296 bs->buffer_alignment = align;
3297}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003298
aliguorie268ca52009-04-22 20:20:00 +00003299void *qemu_blockalign(BlockDriverState *bs, size_t size)
3300{
3301 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3302}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003303
3304void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3305{
3306 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003307
Liran Schouraaa0eb72010-01-26 10:31:48 +02003308 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003309 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003310 if (!bs->dirty_bitmap) {
3311 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3312 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3313 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003314
Anthony Liguori7267c092011-08-20 22:09:37 -05003315 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003316 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003317 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003318 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003319 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003320 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003321 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003322 }
3323}
3324
3325int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3326{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003327 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003328
Jan Kiszkac6d22832009-11-30 18:21:20 +01003329 if (bs->dirty_bitmap &&
3330 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003331 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3332 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003333 } else {
3334 return 0;
3335 }
3336}
3337
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003338void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3339 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003340{
3341 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3342}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003343
3344int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3345{
3346 return bs->dirty_count;
3347}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003348
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003349void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3350{
3351 assert(bs->in_use != in_use);
3352 bs->in_use = in_use;
3353}
3354
3355int bdrv_in_use(BlockDriverState *bs)
3356{
3357 return bs->in_use;
3358}
3359
Luiz Capitulino28a72822011-09-26 17:43:50 -03003360void bdrv_iostatus_enable(BlockDriverState *bs)
3361{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003362 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003363 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003364}
3365
3366/* The I/O status is only enabled if the drive explicitly
3367 * enables it _and_ the VM is configured to stop on errors */
3368bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3369{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003370 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003371 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3372 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3373 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3374}
3375
3376void bdrv_iostatus_disable(BlockDriverState *bs)
3377{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003378 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003379}
3380
3381void bdrv_iostatus_reset(BlockDriverState *bs)
3382{
3383 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003384 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003385 }
3386}
3387
3388/* XXX: Today this is set by device models because it makes the implementation
3389 quite simple. However, the block layer knows about the error, so it's
3390 possible to implement this without device models being involved */
3391void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3392{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003393 if (bdrv_iostatus_is_enabled(bs) &&
3394 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003395 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003396 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3397 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003398 }
3399}
3400
Christoph Hellwiga597e792011-08-25 08:26:01 +02003401void
3402bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3403 enum BlockAcctType type)
3404{
3405 assert(type < BDRV_MAX_IOTYPE);
3406
3407 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003408 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003409 cookie->type = type;
3410}
3411
3412void
3413bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3414{
3415 assert(cookie->type < BDRV_MAX_IOTYPE);
3416
3417 bs->nr_bytes[cookie->type] += cookie->bytes;
3418 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003419 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003420}
3421
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003422int bdrv_img_create(const char *filename, const char *fmt,
3423 const char *base_filename, const char *base_fmt,
3424 char *options, uint64_t img_size, int flags)
3425{
3426 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003427 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003428 BlockDriverState *bs = NULL;
3429 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003430 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003431 int ret = 0;
3432
3433 /* Find driver and parse its options */
3434 drv = bdrv_find_format(fmt);
3435 if (!drv) {
3436 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003437 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003438 goto out;
3439 }
3440
3441 proto_drv = bdrv_find_protocol(filename);
3442 if (!proto_drv) {
3443 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003444 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003445 goto out;
3446 }
3447
3448 create_options = append_option_parameters(create_options,
3449 drv->create_options);
3450 create_options = append_option_parameters(create_options,
3451 proto_drv->create_options);
3452
3453 /* Create parameter list with default values */
3454 param = parse_option_parameters("", create_options, param);
3455
3456 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3457
3458 /* Parse -o options */
3459 if (options) {
3460 param = parse_option_parameters(options, create_options, param);
3461 if (param == NULL) {
3462 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003463 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003464 goto out;
3465 }
3466 }
3467
3468 if (base_filename) {
3469 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3470 base_filename)) {
3471 error_report("Backing file not supported for file format '%s'",
3472 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003473 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003474 goto out;
3475 }
3476 }
3477
3478 if (base_fmt) {
3479 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3480 error_report("Backing file format not supported for file "
3481 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003482 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003483 goto out;
3484 }
3485 }
3486
Jes Sorensen792da932010-12-16 13:52:17 +01003487 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3488 if (backing_file && backing_file->value.s) {
3489 if (!strcmp(filename, backing_file->value.s)) {
3490 error_report("Error: Trying to create an image with the "
3491 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003492 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003493 goto out;
3494 }
3495 }
3496
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003497 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3498 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003499 backing_drv = bdrv_find_format(backing_fmt->value.s);
3500 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003501 error_report("Unknown backing file format '%s'",
3502 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003503 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003504 goto out;
3505 }
3506 }
3507
3508 // The size for the image must always be specified, with one exception:
3509 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003510 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3511 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003512 if (backing_file && backing_file->value.s) {
3513 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003514 char buf[32];
3515
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003516 bs = bdrv_new("");
3517
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003518 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003519 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003520 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003521 goto out;
3522 }
3523 bdrv_get_geometry(bs, &size);
3524 size *= 512;
3525
3526 snprintf(buf, sizeof(buf), "%" PRId64, size);
3527 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3528 } else {
3529 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003530 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003531 goto out;
3532 }
3533 }
3534
3535 printf("Formatting '%s', fmt=%s ", filename, fmt);
3536 print_option_parameters(param);
3537 puts("");
3538
3539 ret = bdrv_create(drv, filename, param);
3540
3541 if (ret < 0) {
3542 if (ret == -ENOTSUP) {
3543 error_report("Formatting or formatting option not supported for "
3544 "file format '%s'", fmt);
3545 } else if (ret == -EFBIG) {
3546 error_report("The image size is too large for file format '%s'",
3547 fmt);
3548 } else {
3549 error_report("%s: error while creating %s: %s", filename, fmt,
3550 strerror(-ret));
3551 }
3552 }
3553
3554out:
3555 free_option_parameters(create_options);
3556 free_option_parameters(param);
3557
3558 if (bs) {
3559 bdrv_delete(bs);
3560 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003561
3562 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003563}