blob: b3117ef3f1399f2c05368df4bd1472a71f983914 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080084static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010094static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000096
Markus Armbrusterf9092b12010-06-25 10:33:39 +020097/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800124void bdrv_io_limits_disable(BlockDriverState *bs)
125{
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140}
141
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800142static void bdrv_block_timer(void *opaque)
143{
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147}
148
149void bdrv_io_limits_enable(BlockDriverState *bs)
150{
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158}
159
160bool bdrv_io_limits_enabled(BlockDriverState *bs)
161{
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169}
170
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800171static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173{
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194}
195
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000196/* check if the path starts with "<protocol>:" */
197static int path_has_protocol(const char *path)
198{
199#ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204#endif
205
206 return strchr(path, ':') != NULL;
207}
208
bellard83f64092006-08-01 16:21:11 +0000209int path_is_absolute(const char *path)
210{
211 const char *p;
bellard21664422007-01-07 18:22:37 +0000212#ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216#endif
bellard83f64092006-08-01 16:21:11 +0000217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000222#ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224#else
225 return (*p == '/');
226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200342int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000344{
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200347
348 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000349}
350
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352{
353 BlockDriver *drv;
354
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900355 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000357 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200358 }
359
360 return bdrv_create(drv, filename, options);
361}
362
bellardd5249392004-08-03 21:14:23 +0000363#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000364void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000365{
bellard3b9f94e2007-01-07 17:27:07 +0000366 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000367
bellard3b9f94e2007-01-07 17:27:07 +0000368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000370}
371#else
bellard95389c82005-12-18 18:28:15 +0000372void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000373{
374 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000375 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000376 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000381 fd = mkstemp(filename);
382 close(fd);
383}
bellardd5249392004-08-03 21:14:23 +0000384#endif
bellardea2384d2004-08-01 21:59:26 +0000385
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200386/*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200390static BlockDriver *find_hdev_driver(const char *filename)
391{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200394
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100395 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200403 }
404
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200405 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200406}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200407
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900408BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200409{
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
Christoph Hellwig39508e72010-06-23 12:25:17 +0200417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200426 return drv1;
427 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000429 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200430 return bdrv_find_format("file");
431 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000432 p = strchr(filename, ':');
433 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446}
447
Stefan Weilc98ac352010-07-21 21:51:51 +0200448static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000449{
bellard83f64092006-08-01 16:21:11 +0000450 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000451 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000452 uint8_t buf[2048];
453 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000454
Naphtali Spreif5edb012010-01-17 16:48:13 +0200455 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700460
Kevin Wolf08a00552010-06-01 18:37:31 +0200461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700470 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700471
bellard83f64092006-08-01 16:21:11 +0000472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200475 *pdrv = NULL;
476 return ret;
bellard83f64092006-08-01 16:21:11 +0000477 }
478
bellardea2384d2004-08-01 21:59:26 +0000479 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
bellardea2384d2004-08-01 21:59:26 +0000488 }
489 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
bellardea2384d2004-08-01 21:59:26 +0000495}
496
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100497/**
498 * Set the current 'total_sectors' value
499 */
500static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501{
502 BlockDriver *drv = bs->drv;
503
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519}
520
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100521/**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526int bdrv_parse_cache_flags(const char *mode, int *flags)
527{
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546}
547
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000548/**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553void bdrv_enable_copy_on_read(BlockDriverState *bs)
554{
555 bs->copy_on_read++;
556}
557
558void bdrv_disable_copy_on_read(BlockDriverState *bs)
559{
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562}
563
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200564/*
Kevin Wolf57915332010-04-14 15:24:50 +0200565 * Common part for opening disk images and files
566 */
567static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569{
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200576 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100577 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200578 bs->encrypted = 0;
579 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100582 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200583 bs->buffer_alignment = 512;
584
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
Kevin Wolf57915332010-04-14 15:24:50 +0200590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100591 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500598 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200609 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
Kevin Wolf57915332010-04-14 15:24:50 +0200627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200634 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100635
Kevin Wolf57915332010-04-14 15:24:50 +0200636#ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640#endif
641 return 0;
642
643free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500648 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652}
653
654/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
bellard83f64092006-08-01 16:21:11 +0000657int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000658{
bellard83f64092006-08-01 16:21:11 +0000659 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200660 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000661 int ret;
662
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900663 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200664 if (!drv) {
665 return -ENOENT;
666 }
667
bellard83f64092006-08-01 16:21:11 +0000668 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200669 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000673 }
aliguori71d07702009-03-03 17:37:16 +0000674 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000675 *pbs = bs;
676 return 0;
bellardea2384d2004-08-01 21:59:26 +0000677}
bellardfc01f7e2003-06-30 10:03:06 +0000678
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200679/*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200682int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000684{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200685 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200686 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000687
bellard83f64092006-08-01 16:21:11 +0000688 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000689 BlockDriverState *bs1;
690 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000691 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200694 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000695
bellardea2384d2004-08-01 21:59:26 +0000696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200701 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000702 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000703 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000704 return ret;
bellardea2384d2004-08-01 21:59:26 +0000705 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
bellardea2384d2004-08-01 21:59:26 +0000711 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000712
bellardea2384d2004-08-01 21:59:26 +0000713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000719 else if (!realpath(filename, backing_filename))
720 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000721
Kevin Wolf91a073a2009-05-27 14:48:06 +0200722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
Jes Sorensen3e829902010-05-27 16:20:30 +0200725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200733 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000734 if (ret < 0) {
735 return ret;
bellardea2384d2004-08-01 21:59:26 +0000736 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737
bellardea2384d2004-08-01 21:59:26 +0000738 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200739 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000740 bs->is_temporary = 1;
741 }
bellard712e7872005-04-28 21:09:32 +0000742
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200743 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200744 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200745 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000746 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100747
aliguori51d7c002009-03-05 23:00:29 +0000748 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000749 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000750 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100755 goto unlink_and_fail;
756 }
757
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000776 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200796 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 }
798
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200804 return 0;
805
806unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811}
812
bellardfc01f7e2003-06-30 10:03:06 +0000813void bdrv_close(BlockDriverState *bs)
814{
bellard19cb3732006-08-19 11:45:59 +0000815 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200816 if (bs->job) {
817 block_job_cancel_sync(bs->job);
818 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200819 if (bs == bs_snapshots) {
820 bs_snapshots = NULL;
821 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100822 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000823 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100824 bs->backing_hd = NULL;
825 }
bellardea2384d2004-08-01 21:59:26 +0000826 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500827 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000828#ifdef _WIN32
829 if (bs->is_temporary) {
830 unlink(bs->filename);
831 }
bellard67b915a2004-03-31 23:37:16 +0000832#endif
bellardea2384d2004-08-01 21:59:26 +0000833 bs->opaque = NULL;
834 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000835 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000836
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200837 if (bs->file != NULL) {
838 bdrv_close(bs->file);
839 }
840
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200841 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000842 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800843
844 /*throttling disk I/O limits*/
845 if (bs->io_limits_enabled) {
846 bdrv_io_limits_disable(bs);
847 }
bellardb3380822004-03-14 21:38:54 +0000848}
849
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900850void bdrv_close_all(void)
851{
852 BlockDriverState *bs;
853
854 QTAILQ_FOREACH(bs, &bdrv_states, list) {
855 bdrv_close(bs);
856 }
857}
858
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000859/*
860 * Wait for pending requests to complete across all BlockDriverStates
861 *
862 * This function does not flush data to disk, use bdrv_flush_all() for that
863 * after calling this function.
864 */
865void bdrv_drain_all(void)
866{
867 BlockDriverState *bs;
868
869 qemu_aio_flush();
870
871 /* If requests are still pending there is a bug somewhere */
872 QTAILQ_FOREACH(bs, &bdrv_states, list) {
873 assert(QLIST_EMPTY(&bs->tracked_requests));
874 assert(qemu_co_queue_empty(&bs->throttled_reqs));
875 }
876}
877
Ryan Harperd22b2f42011-03-29 20:51:47 -0500878/* make a BlockDriverState anonymous by removing from bdrv_state list.
879 Also, NULL terminate the device_name to prevent double remove */
880void bdrv_make_anon(BlockDriverState *bs)
881{
882 if (bs->device_name[0] != '\0') {
883 QTAILQ_REMOVE(&bdrv_states, bs, list);
884 }
885 bs->device_name[0] = '\0';
886}
887
Jeff Cody8802d1f2012-02-28 15:54:06 -0500888/*
889 * Add new bs contents at the top of an image chain while the chain is
890 * live, while keeping required fields on the top layer.
891 *
892 * This will modify the BlockDriverState fields, and swap contents
893 * between bs_new and bs_top. Both bs_new and bs_top are modified.
894 *
Jeff Codyf6801b82012-03-27 16:30:19 -0400895 * bs_new is required to be anonymous.
896 *
Jeff Cody8802d1f2012-02-28 15:54:06 -0500897 * This function does not create any image files.
898 */
899void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
900{
901 BlockDriverState tmp;
902
Jeff Codyf6801b82012-03-27 16:30:19 -0400903 /* bs_new must be anonymous */
904 assert(bs_new->device_name[0] == '\0');
Jeff Cody8802d1f2012-02-28 15:54:06 -0500905
906 tmp = *bs_new;
907
908 /* there are some fields that need to stay on the top layer: */
909
910 /* dev info */
911 tmp.dev_ops = bs_top->dev_ops;
912 tmp.dev_opaque = bs_top->dev_opaque;
913 tmp.dev = bs_top->dev;
914 tmp.buffer_alignment = bs_top->buffer_alignment;
915 tmp.copy_on_read = bs_top->copy_on_read;
916
917 /* i/o timing parameters */
918 tmp.slice_time = bs_top->slice_time;
919 tmp.slice_start = bs_top->slice_start;
920 tmp.slice_end = bs_top->slice_end;
921 tmp.io_limits = bs_top->io_limits;
922 tmp.io_base = bs_top->io_base;
923 tmp.throttled_reqs = bs_top->throttled_reqs;
924 tmp.block_timer = bs_top->block_timer;
925 tmp.io_limits_enabled = bs_top->io_limits_enabled;
926
927 /* geometry */
928 tmp.cyls = bs_top->cyls;
929 tmp.heads = bs_top->heads;
930 tmp.secs = bs_top->secs;
931 tmp.translation = bs_top->translation;
932
933 /* r/w error */
934 tmp.on_read_error = bs_top->on_read_error;
935 tmp.on_write_error = bs_top->on_write_error;
936
937 /* i/o status */
938 tmp.iostatus_enabled = bs_top->iostatus_enabled;
939 tmp.iostatus = bs_top->iostatus;
940
941 /* keep the same entry in bdrv_states */
942 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
943 tmp.list = bs_top->list;
944
945 /* The contents of 'tmp' will become bs_top, as we are
946 * swapping bs_new and bs_top contents. */
947 tmp.backing_hd = bs_new;
948 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
Jeff Codyf6801b82012-03-27 16:30:19 -0400949 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
Jeff Cody8802d1f2012-02-28 15:54:06 -0500950
951 /* swap contents of the fixed new bs and the current top */
952 *bs_new = *bs_top;
953 *bs_top = tmp;
954
Jeff Codyf6801b82012-03-27 16:30:19 -0400955 /* device_name[] was carried over from the old bs_top. bs_new
956 * shouldn't be in bdrv_states, so we need to make device_name[]
957 * reflect the anonymity of bs_new
958 */
959 bs_new->device_name[0] = '\0';
960
Jeff Cody8802d1f2012-02-28 15:54:06 -0500961 /* clear the copied fields in the new backing file */
962 bdrv_detach_dev(bs_new, bs_new->dev);
963
964 qemu_co_queue_init(&bs_new->throttled_reqs);
965 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
966 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
967 bdrv_iostatus_disable(bs_new);
968
969 /* we don't use bdrv_io_limits_disable() for this, because we don't want
970 * to affect or delete the block_timer, as it has been moved to bs_top */
971 bs_new->io_limits_enabled = false;
972 bs_new->block_timer = NULL;
973 bs_new->slice_time = 0;
974 bs_new->slice_start = 0;
975 bs_new->slice_end = 0;
976}
977
bellardb3380822004-03-14 21:38:54 +0000978void bdrv_delete(BlockDriverState *bs)
979{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200980 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +0200981 assert(!bs->job);
982 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +0200983
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100984 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500985 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000986
bellardb3380822004-03-14 21:38:54 +0000987 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200988 if (bs->file != NULL) {
989 bdrv_delete(bs->file);
990 }
991
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200992 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500993 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000994}
995
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200996int bdrv_attach_dev(BlockDriverState *bs, void *dev)
997/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200998{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200999 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001000 return -EBUSY;
1001 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001002 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001003 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001004 return 0;
1005}
1006
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001007/* TODO qdevified devices don't use this, remove when devices are qdevified */
1008void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001009{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001010 if (bdrv_attach_dev(bs, dev) < 0) {
1011 abort();
1012 }
1013}
1014
1015void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1016/* TODO change to DeviceState *dev when all users are qdevified */
1017{
1018 assert(bs->dev == dev);
1019 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001020 bs->dev_ops = NULL;
1021 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001022 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001023}
1024
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001025/* TODO change to return DeviceState * when all users are qdevified */
1026void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001027{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001028 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001029}
1030
Markus Armbruster0e49de52011-08-03 15:07:41 +02001031void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1032 void *opaque)
1033{
1034 bs->dev_ops = ops;
1035 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001036 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1037 bs_snapshots = NULL;
1038 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001039}
1040
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001041void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1042 BlockQMPEventAction action, int is_read)
1043{
1044 QObject *data;
1045 const char *action_str;
1046
1047 switch (action) {
1048 case BDRV_ACTION_REPORT:
1049 action_str = "report";
1050 break;
1051 case BDRV_ACTION_IGNORE:
1052 action_str = "ignore";
1053 break;
1054 case BDRV_ACTION_STOP:
1055 action_str = "stop";
1056 break;
1057 default:
1058 abort();
1059 }
1060
1061 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1062 bdrv->device_name,
1063 action_str,
1064 is_read ? "read" : "write");
1065 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1066
1067 qobject_decref(data);
1068}
1069
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001070static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1071{
1072 QObject *data;
1073
1074 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1075 bdrv_get_device_name(bs), ejected);
1076 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1077
1078 qobject_decref(data);
1079}
1080
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001081static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001082{
Markus Armbruster145feb12011-08-03 15:07:42 +02001083 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001084 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001085 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001086 if (tray_was_closed) {
1087 /* tray open */
1088 bdrv_emit_qmp_eject_event(bs, true);
1089 }
1090 if (load) {
1091 /* tray close */
1092 bdrv_emit_qmp_eject_event(bs, false);
1093 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001094 }
1095}
1096
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001097bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1098{
1099 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1100}
1101
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001102void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1103{
1104 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1105 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1106 }
1107}
1108
Markus Armbrustere4def802011-09-06 18:58:53 +02001109bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1110{
1111 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1112 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1113 }
1114 return false;
1115}
1116
Markus Armbruster145feb12011-08-03 15:07:42 +02001117static void bdrv_dev_resize_cb(BlockDriverState *bs)
1118{
1119 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1120 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001121 }
1122}
1123
Markus Armbrusterf1076392011-09-06 18:58:46 +02001124bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1125{
1126 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1127 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1128 }
1129 return false;
1130}
1131
aliguorie97fc192009-04-21 23:11:50 +00001132/*
1133 * Run consistency checks on an image
1134 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001135 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001136 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001137 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001138 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001139int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001140{
1141 if (bs->drv->bdrv_check == NULL) {
1142 return -ENOTSUP;
1143 }
1144
Kevin Wolfe076f332010-06-29 11:43:13 +02001145 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001146 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001147}
1148
Kevin Wolf8a426612010-07-16 17:17:01 +02001149#define COMMIT_BUF_SECTORS 2048
1150
bellard33e39632003-07-06 17:15:21 +00001151/* commit COW file into the raw image */
1152int bdrv_commit(BlockDriverState *bs)
1153{
bellard19cb3732006-08-19 11:45:59 +00001154 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001155 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001156 int64_t sector, total_sectors;
1157 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001158 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001159 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001160 char filename[1024];
1161 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001162
bellard19cb3732006-08-19 11:45:59 +00001163 if (!drv)
1164 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001165
1166 if (!bs->backing_hd) {
1167 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001168 }
1169
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001170 if (bs->backing_hd->keep_read_only) {
1171 return -EACCES;
1172 }
Kevin Wolfee181192010-08-05 13:05:22 +02001173
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001174 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1175 return -EBUSY;
1176 }
1177
Kevin Wolfee181192010-08-05 13:05:22 +02001178 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001179 ro = bs->backing_hd->read_only;
1180 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1181 open_flags = bs->backing_hd->open_flags;
1182
1183 if (ro) {
1184 /* re-open as RW */
1185 bdrv_delete(bs->backing_hd);
1186 bs->backing_hd = NULL;
1187 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001188 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1189 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001190 if (rw_ret < 0) {
1191 bdrv_delete(bs_rw);
1192 /* try to re-open read-only */
1193 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001194 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1195 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001196 if (ret < 0) {
1197 bdrv_delete(bs_ro);
1198 /* drive not functional anymore */
1199 bs->drv = NULL;
1200 return ret;
1201 }
1202 bs->backing_hd = bs_ro;
1203 return rw_ret;
1204 }
1205 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001206 }
bellardea2384d2004-08-01 21:59:26 +00001207
Jan Kiszka6ea44302009-11-30 18:21:19 +01001208 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001209 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001210
Kevin Wolf8a426612010-07-16 17:17:01 +02001211 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001212 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001213
1214 if (bdrv_read(bs, sector, buf, n) != 0) {
1215 ret = -EIO;
1216 goto ro_cleanup;
1217 }
1218
1219 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1220 ret = -EIO;
1221 goto ro_cleanup;
1222 }
bellardea2384d2004-08-01 21:59:26 +00001223 }
1224 }
bellard95389c82005-12-18 18:28:15 +00001225
Christoph Hellwig1d449522010-01-17 12:32:30 +01001226 if (drv->bdrv_make_empty) {
1227 ret = drv->bdrv_make_empty(bs);
1228 bdrv_flush(bs);
1229 }
bellard95389c82005-12-18 18:28:15 +00001230
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001231 /*
1232 * Make sure all data we wrote to the backing device is actually
1233 * stable on disk.
1234 */
1235 if (bs->backing_hd)
1236 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001237
1238ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001239 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001240
1241 if (ro) {
1242 /* re-open as RO */
1243 bdrv_delete(bs->backing_hd);
1244 bs->backing_hd = NULL;
1245 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001246 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1247 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001248 if (ret < 0) {
1249 bdrv_delete(bs_ro);
1250 /* drive not functional anymore */
1251 bs->drv = NULL;
1252 return ret;
1253 }
1254 bs->backing_hd = bs_ro;
1255 bs->backing_hd->keep_read_only = 0;
1256 }
1257
Christoph Hellwig1d449522010-01-17 12:32:30 +01001258 return ret;
bellard33e39632003-07-06 17:15:21 +00001259}
1260
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001261int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001262{
1263 BlockDriverState *bs;
1264
1265 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001266 int ret = bdrv_commit(bs);
1267 if (ret < 0) {
1268 return ret;
1269 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001270 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001271 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001272}
1273
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001274struct BdrvTrackedRequest {
1275 BlockDriverState *bs;
1276 int64_t sector_num;
1277 int nb_sectors;
1278 bool is_write;
1279 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001280 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001281 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001282};
1283
1284/**
1285 * Remove an active request from the tracked requests list
1286 *
1287 * This function should be called when a tracked request is completing.
1288 */
1289static void tracked_request_end(BdrvTrackedRequest *req)
1290{
1291 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001292 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001293}
1294
1295/**
1296 * Add an active request to the tracked requests list
1297 */
1298static void tracked_request_begin(BdrvTrackedRequest *req,
1299 BlockDriverState *bs,
1300 int64_t sector_num,
1301 int nb_sectors, bool is_write)
1302{
1303 *req = (BdrvTrackedRequest){
1304 .bs = bs,
1305 .sector_num = sector_num,
1306 .nb_sectors = nb_sectors,
1307 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001308 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001309 };
1310
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001311 qemu_co_queue_init(&req->wait_queue);
1312
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001313 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1314}
1315
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001316/**
1317 * Round a region to cluster boundaries
1318 */
1319static void round_to_clusters(BlockDriverState *bs,
1320 int64_t sector_num, int nb_sectors,
1321 int64_t *cluster_sector_num,
1322 int *cluster_nb_sectors)
1323{
1324 BlockDriverInfo bdi;
1325
1326 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1327 *cluster_sector_num = sector_num;
1328 *cluster_nb_sectors = nb_sectors;
1329 } else {
1330 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1331 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1332 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1333 nb_sectors, c);
1334 }
1335}
1336
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001337static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1338 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001339 /* aaaa bbbb */
1340 if (sector_num >= req->sector_num + req->nb_sectors) {
1341 return false;
1342 }
1343 /* bbbb aaaa */
1344 if (req->sector_num >= sector_num + nb_sectors) {
1345 return false;
1346 }
1347 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001348}
1349
1350static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1351 int64_t sector_num, int nb_sectors)
1352{
1353 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001354 int64_t cluster_sector_num;
1355 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001356 bool retry;
1357
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001358 /* If we touch the same cluster it counts as an overlap. This guarantees
1359 * that allocating writes will be serialized and not race with each other
1360 * for the same cluster. For example, in copy-on-read it ensures that the
1361 * CoR read and write operations are atomic and guest writes cannot
1362 * interleave between them.
1363 */
1364 round_to_clusters(bs, sector_num, nb_sectors,
1365 &cluster_sector_num, &cluster_nb_sectors);
1366
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001367 do {
1368 retry = false;
1369 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001370 if (tracked_request_overlaps(req, cluster_sector_num,
1371 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001372 /* Hitting this means there was a reentrant request, for
1373 * example, a block driver issuing nested requests. This must
1374 * never happen since it means deadlock.
1375 */
1376 assert(qemu_coroutine_self() != req->co);
1377
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001378 qemu_co_queue_wait(&req->wait_queue);
1379 retry = true;
1380 break;
1381 }
1382 }
1383 } while (retry);
1384}
1385
Kevin Wolf756e6732010-01-12 12:55:17 +01001386/*
1387 * Return values:
1388 * 0 - success
1389 * -EINVAL - backing format specified, but no file
1390 * -ENOSPC - can't update the backing file because no space is left in the
1391 * image file header
1392 * -ENOTSUP - format driver doesn't support changing the backing file
1393 */
1394int bdrv_change_backing_file(BlockDriverState *bs,
1395 const char *backing_file, const char *backing_fmt)
1396{
1397 BlockDriver *drv = bs->drv;
1398
1399 if (drv->bdrv_change_backing_file != NULL) {
1400 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1401 } else {
1402 return -ENOTSUP;
1403 }
1404}
1405
aliguori71d07702009-03-03 17:37:16 +00001406static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1407 size_t size)
1408{
1409 int64_t len;
1410
1411 if (!bdrv_is_inserted(bs))
1412 return -ENOMEDIUM;
1413
1414 if (bs->growable)
1415 return 0;
1416
1417 len = bdrv_getlength(bs);
1418
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001419 if (offset < 0)
1420 return -EIO;
1421
1422 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001423 return -EIO;
1424
1425 return 0;
1426}
1427
1428static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1429 int nb_sectors)
1430{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001431 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1432 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001433}
1434
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001435typedef struct RwCo {
1436 BlockDriverState *bs;
1437 int64_t sector_num;
1438 int nb_sectors;
1439 QEMUIOVector *qiov;
1440 bool is_write;
1441 int ret;
1442} RwCo;
1443
1444static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1445{
1446 RwCo *rwco = opaque;
1447
1448 if (!rwco->is_write) {
1449 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001450 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001451 } else {
1452 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001453 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001454 }
1455}
1456
1457/*
1458 * Process a synchronous request using coroutines
1459 */
1460static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1461 int nb_sectors, bool is_write)
1462{
1463 QEMUIOVector qiov;
1464 struct iovec iov = {
1465 .iov_base = (void *)buf,
1466 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1467 };
1468 Coroutine *co;
1469 RwCo rwco = {
1470 .bs = bs,
1471 .sector_num = sector_num,
1472 .nb_sectors = nb_sectors,
1473 .qiov = &qiov,
1474 .is_write = is_write,
1475 .ret = NOT_DONE,
1476 };
1477
1478 qemu_iovec_init_external(&qiov, &iov, 1);
1479
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001480 /**
1481 * In sync call context, when the vcpu is blocked, this throttling timer
1482 * will not fire; so the I/O throttling function has to be disabled here
1483 * if it has been enabled.
1484 */
1485 if (bs->io_limits_enabled) {
1486 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1487 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1488 bdrv_io_limits_disable(bs);
1489 }
1490
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001491 if (qemu_in_coroutine()) {
1492 /* Fast-path if already in coroutine context */
1493 bdrv_rw_co_entry(&rwco);
1494 } else {
1495 co = qemu_coroutine_create(bdrv_rw_co_entry);
1496 qemu_coroutine_enter(co, &rwco);
1497 while (rwco.ret == NOT_DONE) {
1498 qemu_aio_wait();
1499 }
1500 }
1501 return rwco.ret;
1502}
1503
bellard19cb3732006-08-19 11:45:59 +00001504/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001505int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001506 uint8_t *buf, int nb_sectors)
1507{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001508 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001509}
1510
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001511static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001512 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001513{
1514 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001515 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001516
Jan Kiszka6ea44302009-11-30 18:21:19 +01001517 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001518 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001519
1520 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001521 idx = start / (sizeof(unsigned long) * 8);
1522 bit = start % (sizeof(unsigned long) * 8);
1523 val = bs->dirty_bitmap[idx];
1524 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001525 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001526 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001527 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001528 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001529 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001530 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001531 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001532 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001533 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001534 }
1535 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001536 }
1537}
1538
ths5fafdf22007-09-16 21:08:06 +00001539/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001540 -EIO generic I/O error (may happen for all errors)
1541 -ENOMEDIUM No media inserted.
1542 -EINVAL Invalid sector number or nb_sectors
1543 -EACCES Trying to write a read-only device
1544*/
ths5fafdf22007-09-16 21:08:06 +00001545int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001546 const uint8_t *buf, int nb_sectors)
1547{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001548 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001549}
1550
aliguorieda578e2009-03-12 19:57:16 +00001551int bdrv_pread(BlockDriverState *bs, int64_t offset,
1552 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001553{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001554 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001555 int len, nb_sectors, count;
1556 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001557 int ret;
bellard83f64092006-08-01 16:21:11 +00001558
1559 count = count1;
1560 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001561 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001562 if (len > count)
1563 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001564 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001565 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001566 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1567 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001568 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001569 count -= len;
1570 if (count == 0)
1571 return count1;
1572 sector_num++;
1573 buf += len;
1574 }
1575
1576 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001577 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001578 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001579 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1580 return ret;
bellard83f64092006-08-01 16:21:11 +00001581 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001582 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001583 buf += len;
1584 count -= len;
1585 }
1586
1587 /* add data from the last sector */
1588 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001589 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1590 return ret;
bellard83f64092006-08-01 16:21:11 +00001591 memcpy(buf, tmp_buf, count);
1592 }
1593 return count1;
1594}
1595
aliguorieda578e2009-03-12 19:57:16 +00001596int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1597 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001598{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001599 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001600 int len, nb_sectors, count;
1601 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001602 int ret;
bellard83f64092006-08-01 16:21:11 +00001603
1604 count = count1;
1605 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001606 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001607 if (len > count)
1608 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001609 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001610 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001611 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1612 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001613 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001614 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1615 return ret;
bellard83f64092006-08-01 16:21:11 +00001616 count -= len;
1617 if (count == 0)
1618 return count1;
1619 sector_num++;
1620 buf += len;
1621 }
1622
1623 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001624 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001625 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001626 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1627 return ret;
bellard83f64092006-08-01 16:21:11 +00001628 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001629 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001630 buf += len;
1631 count -= len;
1632 }
1633
1634 /* add data from the last sector */
1635 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001636 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1637 return ret;
bellard83f64092006-08-01 16:21:11 +00001638 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001639 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1640 return ret;
bellard83f64092006-08-01 16:21:11 +00001641 }
1642 return count1;
1643}
bellard83f64092006-08-01 16:21:11 +00001644
Kevin Wolff08145f2010-06-16 16:38:15 +02001645/*
1646 * Writes to the file and ensures that no writes are reordered across this
1647 * request (acts as a barrier)
1648 *
1649 * Returns 0 on success, -errno in error cases.
1650 */
1651int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1652 const void *buf, int count)
1653{
1654 int ret;
1655
1656 ret = bdrv_pwrite(bs, offset, buf, count);
1657 if (ret < 0) {
1658 return ret;
1659 }
1660
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001661 /* No flush needed for cache modes that use O_DSYNC */
1662 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001663 bdrv_flush(bs);
1664 }
1665
1666 return 0;
1667}
1668
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001669static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001670 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1671{
1672 /* Perform I/O through a temporary buffer so that users who scribble over
1673 * their read buffer while the operation is in progress do not end up
1674 * modifying the image file. This is critical for zero-copy guest I/O
1675 * where anything might happen inside guest memory.
1676 */
1677 void *bounce_buffer;
1678
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001679 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001680 struct iovec iov;
1681 QEMUIOVector bounce_qiov;
1682 int64_t cluster_sector_num;
1683 int cluster_nb_sectors;
1684 size_t skip_bytes;
1685 int ret;
1686
1687 /* Cover entire cluster so no additional backing file I/O is required when
1688 * allocating cluster in the image file.
1689 */
1690 round_to_clusters(bs, sector_num, nb_sectors,
1691 &cluster_sector_num, &cluster_nb_sectors);
1692
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001693 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1694 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001695
1696 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1697 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1698 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1699
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001700 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1701 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001702 if (ret < 0) {
1703 goto err;
1704 }
1705
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001706 if (drv->bdrv_co_write_zeroes &&
1707 buffer_is_zero(bounce_buffer, iov.iov_len)) {
1708 ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
1709 cluster_nb_sectors);
1710 } else {
1711 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001712 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001713 }
1714
Stefan Hajnocziab185922011-11-17 13:40:31 +00001715 if (ret < 0) {
1716 /* It might be okay to ignore write errors for guest requests. If this
1717 * is a deliberate copy-on-read then we don't want to ignore the error.
1718 * Simply report it in all cases.
1719 */
1720 goto err;
1721 }
1722
1723 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1724 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1725 nb_sectors * BDRV_SECTOR_SIZE);
1726
1727err:
1728 qemu_vfree(bounce_buffer);
1729 return ret;
1730}
1731
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001732/*
1733 * Handle a read request in coroutine context
1734 */
1735static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001736 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1737 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001738{
1739 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001740 BdrvTrackedRequest req;
1741 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001742
Kevin Wolfda1fa912011-07-14 17:27:13 +02001743 if (!drv) {
1744 return -ENOMEDIUM;
1745 }
1746 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1747 return -EIO;
1748 }
1749
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001750 /* throttling disk read I/O */
1751 if (bs->io_limits_enabled) {
1752 bdrv_io_limits_intercept(bs, false, nb_sectors);
1753 }
1754
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001755 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001756 flags |= BDRV_REQ_COPY_ON_READ;
1757 }
1758 if (flags & BDRV_REQ_COPY_ON_READ) {
1759 bs->copy_on_read_in_flight++;
1760 }
1761
1762 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001763 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1764 }
1765
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001766 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001767
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001768 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001769 int pnum;
1770
1771 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1772 if (ret < 0) {
1773 goto out;
1774 }
1775
1776 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001777 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001778 goto out;
1779 }
1780 }
1781
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001782 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001783
1784out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001785 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001786
1787 if (flags & BDRV_REQ_COPY_ON_READ) {
1788 bs->copy_on_read_in_flight--;
1789 }
1790
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001791 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001792}
1793
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001794int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001795 int nb_sectors, QEMUIOVector *qiov)
1796{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001797 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001798
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001799 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1800}
1801
1802int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1803 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1804{
1805 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1806
1807 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1808 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001809}
1810
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001811static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1812 int64_t sector_num, int nb_sectors)
1813{
1814 BlockDriver *drv = bs->drv;
1815 QEMUIOVector qiov;
1816 struct iovec iov;
1817 int ret;
1818
1819 /* First try the efficient write zeroes operation */
1820 if (drv->bdrv_co_write_zeroes) {
1821 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1822 }
1823
1824 /* Fall back to bounce buffer if write zeroes is unsupported */
1825 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1826 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1827 memset(iov.iov_base, 0, iov.iov_len);
1828 qemu_iovec_init_external(&qiov, &iov, 1);
1829
1830 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1831
1832 qemu_vfree(iov.iov_base);
1833 return ret;
1834}
1835
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001836/*
1837 * Handle a write request in coroutine context
1838 */
1839static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001840 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1841 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001842{
1843 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001844 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001845 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001846
1847 if (!bs->drv) {
1848 return -ENOMEDIUM;
1849 }
1850 if (bs->read_only) {
1851 return -EACCES;
1852 }
1853 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1854 return -EIO;
1855 }
1856
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001857 /* throttling disk write I/O */
1858 if (bs->io_limits_enabled) {
1859 bdrv_io_limits_intercept(bs, true, nb_sectors);
1860 }
1861
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001862 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001863 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1864 }
1865
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001866 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1867
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001868 if (flags & BDRV_REQ_ZERO_WRITE) {
1869 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1870 } else {
1871 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1872 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001873
Kevin Wolfda1fa912011-07-14 17:27:13 +02001874 if (bs->dirty_bitmap) {
1875 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1876 }
1877
1878 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1879 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1880 }
1881
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001882 tracked_request_end(&req);
1883
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001884 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001885}
1886
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001887int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1888 int nb_sectors, QEMUIOVector *qiov)
1889{
1890 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1891
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001892 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1893}
1894
1895int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1896 int64_t sector_num, int nb_sectors)
1897{
1898 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1899
1900 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1901 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001902}
1903
bellard83f64092006-08-01 16:21:11 +00001904/**
bellard83f64092006-08-01 16:21:11 +00001905 * Truncate file to 'offset' bytes (needed only for file protocols)
1906 */
1907int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1908{
1909 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001910 int ret;
bellard83f64092006-08-01 16:21:11 +00001911 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001912 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001913 if (!drv->bdrv_truncate)
1914 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001915 if (bs->read_only)
1916 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001917 if (bdrv_in_use(bs))
1918 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001919 ret = drv->bdrv_truncate(bs, offset);
1920 if (ret == 0) {
1921 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001922 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001923 }
1924 return ret;
bellard83f64092006-08-01 16:21:11 +00001925}
1926
1927/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001928 * Length of a allocated file in bytes. Sparse files are counted by actual
1929 * allocated space. Return < 0 if error or unknown.
1930 */
1931int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1932{
1933 BlockDriver *drv = bs->drv;
1934 if (!drv) {
1935 return -ENOMEDIUM;
1936 }
1937 if (drv->bdrv_get_allocated_file_size) {
1938 return drv->bdrv_get_allocated_file_size(bs);
1939 }
1940 if (bs->file) {
1941 return bdrv_get_allocated_file_size(bs->file);
1942 }
1943 return -ENOTSUP;
1944}
1945
1946/**
bellard83f64092006-08-01 16:21:11 +00001947 * Length of a file in bytes. Return < 0 if error or unknown.
1948 */
1949int64_t bdrv_getlength(BlockDriverState *bs)
1950{
1951 BlockDriver *drv = bs->drv;
1952 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001953 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001954
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001955 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001956 if (drv->bdrv_getlength) {
1957 return drv->bdrv_getlength(bs);
1958 }
bellard83f64092006-08-01 16:21:11 +00001959 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001960 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001961}
1962
bellard19cb3732006-08-19 11:45:59 +00001963/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001964void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001965{
bellard19cb3732006-08-19 11:45:59 +00001966 int64_t length;
1967 length = bdrv_getlength(bs);
1968 if (length < 0)
1969 length = 0;
1970 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001971 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001972 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001973}
bellardcf989512004-02-16 21:56:36 +00001974
aliguorif3d54fc2008-11-25 21:50:24 +00001975struct partition {
1976 uint8_t boot_ind; /* 0x80 - active */
1977 uint8_t head; /* starting head */
1978 uint8_t sector; /* starting sector */
1979 uint8_t cyl; /* starting cylinder */
1980 uint8_t sys_ind; /* What partition type */
1981 uint8_t end_head; /* end head */
1982 uint8_t end_sector; /* end sector */
1983 uint8_t end_cyl; /* end cylinder */
1984 uint32_t start_sect; /* starting sector counting from 0 */
1985 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001986} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001987
1988/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1989static int guess_disk_lchs(BlockDriverState *bs,
1990 int *pcylinders, int *pheads, int *psectors)
1991{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001992 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001993 int ret, i, heads, sectors, cylinders;
1994 struct partition *p;
1995 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001996 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001997 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00001998
1999 bdrv_get_geometry(bs, &nb_sectors);
2000
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002001 /**
2002 * The function will be invoked during startup not only in sync I/O mode,
2003 * but also in async I/O mode. So the I/O throttling function has to
2004 * be disabled temporarily here, not permanently.
2005 */
2006 enabled = bs->io_limits_enabled;
2007 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00002008 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002009 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002010 if (ret < 0)
2011 return -1;
2012 /* test msdos magic */
2013 if (buf[510] != 0x55 || buf[511] != 0xaa)
2014 return -1;
2015 for(i = 0; i < 4; i++) {
2016 p = ((struct partition *)(buf + 0x1be)) + i;
2017 nr_sects = le32_to_cpu(p->nr_sects);
2018 if (nr_sects && p->end_head) {
2019 /* We make the assumption that the partition terminates on
2020 a cylinder boundary */
2021 heads = p->end_head + 1;
2022 sectors = p->end_sector & 63;
2023 if (sectors == 0)
2024 continue;
2025 cylinders = nb_sectors / (heads * sectors);
2026 if (cylinders < 1 || cylinders > 16383)
2027 continue;
2028 *pheads = heads;
2029 *psectors = sectors;
2030 *pcylinders = cylinders;
2031#if 0
2032 printf("guessed geometry: LCHS=%d %d %d\n",
2033 cylinders, heads, sectors);
2034#endif
2035 return 0;
2036 }
2037 }
2038 return -1;
2039}
2040
2041void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2042{
2043 int translation, lba_detected = 0;
2044 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002045 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002046
2047 /* if a geometry hint is available, use it */
2048 bdrv_get_geometry(bs, &nb_sectors);
2049 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2050 translation = bdrv_get_translation_hint(bs);
2051 if (cylinders != 0) {
2052 *pcyls = cylinders;
2053 *pheads = heads;
2054 *psecs = secs;
2055 } else {
2056 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2057 if (heads > 16) {
2058 /* if heads > 16, it means that a BIOS LBA
2059 translation was active, so the default
2060 hardware geometry is OK */
2061 lba_detected = 1;
2062 goto default_geometry;
2063 } else {
2064 *pcyls = cylinders;
2065 *pheads = heads;
2066 *psecs = secs;
2067 /* disable any translation to be in sync with
2068 the logical geometry */
2069 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2070 bdrv_set_translation_hint(bs,
2071 BIOS_ATA_TRANSLATION_NONE);
2072 }
2073 }
2074 } else {
2075 default_geometry:
2076 /* if no geometry, use a standard physical disk geometry */
2077 cylinders = nb_sectors / (16 * 63);
2078
2079 if (cylinders > 16383)
2080 cylinders = 16383;
2081 else if (cylinders < 2)
2082 cylinders = 2;
2083 *pcyls = cylinders;
2084 *pheads = 16;
2085 *psecs = 63;
2086 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2087 if ((*pcyls * *pheads) <= 131072) {
2088 bdrv_set_translation_hint(bs,
2089 BIOS_ATA_TRANSLATION_LARGE);
2090 } else {
2091 bdrv_set_translation_hint(bs,
2092 BIOS_ATA_TRANSLATION_LBA);
2093 }
2094 }
2095 }
2096 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2097 }
2098}
2099
ths5fafdf22007-09-16 21:08:06 +00002100void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002101 int cyls, int heads, int secs)
2102{
2103 bs->cyls = cyls;
2104 bs->heads = heads;
2105 bs->secs = secs;
2106}
2107
bellard46d47672004-11-16 01:45:27 +00002108void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2109{
2110 bs->translation = translation;
2111}
2112
ths5fafdf22007-09-16 21:08:06 +00002113void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002114 int *pcyls, int *pheads, int *psecs)
2115{
2116 *pcyls = bs->cyls;
2117 *pheads = bs->heads;
2118 *psecs = bs->secs;
2119}
2120
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002121/* throttling disk io limits */
2122void bdrv_set_io_limits(BlockDriverState *bs,
2123 BlockIOLimit *io_limits)
2124{
2125 bs->io_limits = *io_limits;
2126 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2127}
2128
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002129/* Recognize floppy formats */
2130typedef struct FDFormat {
2131 FDriveType drive;
2132 uint8_t last_sect;
2133 uint8_t max_track;
2134 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002135 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002136} FDFormat;
2137
2138static const FDFormat fd_formats[] = {
2139 /* First entry is default format */
2140 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002141 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2142 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2143 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2144 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2145 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2146 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2147 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2148 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002149 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002150 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2151 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2152 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2153 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2154 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002155 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002156 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2157 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2158 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2159 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2160 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2161 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002162 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002163 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2164 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2165 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2166 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2167 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002168 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002169 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2170 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002171 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002172 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2173 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2174 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2175 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002176 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002177 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2178 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002179 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002180 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002181 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002182 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002183};
2184
2185void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2186 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002187 FDriveType drive_in, FDriveType *drive,
2188 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002189{
2190 const FDFormat *parse;
2191 uint64_t nb_sectors, size;
2192 int i, first_match, match;
2193
2194 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2195 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2196 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002197 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002198 } else {
2199 bdrv_get_geometry(bs, &nb_sectors);
2200 match = -1;
2201 first_match = -1;
2202 for (i = 0; ; i++) {
2203 parse = &fd_formats[i];
2204 if (parse->drive == FDRIVE_DRV_NONE) {
2205 break;
2206 }
2207 if (drive_in == parse->drive ||
2208 drive_in == FDRIVE_DRV_NONE) {
2209 size = (parse->max_head + 1) * parse->max_track *
2210 parse->last_sect;
2211 if (nb_sectors == size) {
2212 match = i;
2213 break;
2214 }
2215 if (first_match == -1) {
2216 first_match = i;
2217 }
2218 }
2219 }
2220 if (match == -1) {
2221 if (first_match == -1) {
2222 match = 1;
2223 } else {
2224 match = first_match;
2225 }
2226 parse = &fd_formats[match];
2227 }
2228 *nb_heads = parse->max_head + 1;
2229 *max_track = parse->max_track;
2230 *last_sect = parse->last_sect;
2231 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002232 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002233 }
2234}
2235
bellard46d47672004-11-16 01:45:27 +00002236int bdrv_get_translation_hint(BlockDriverState *bs)
2237{
2238 return bs->translation;
2239}
2240
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002241void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2242 BlockErrorAction on_write_error)
2243{
2244 bs->on_read_error = on_read_error;
2245 bs->on_write_error = on_write_error;
2246}
2247
2248BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2249{
2250 return is_read ? bs->on_read_error : bs->on_write_error;
2251}
2252
bellardb3380822004-03-14 21:38:54 +00002253int bdrv_is_read_only(BlockDriverState *bs)
2254{
2255 return bs->read_only;
2256}
2257
ths985a03b2007-12-24 16:10:43 +00002258int bdrv_is_sg(BlockDriverState *bs)
2259{
2260 return bs->sg;
2261}
2262
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002263int bdrv_enable_write_cache(BlockDriverState *bs)
2264{
2265 return bs->enable_write_cache;
2266}
2267
bellardea2384d2004-08-01 21:59:26 +00002268int bdrv_is_encrypted(BlockDriverState *bs)
2269{
2270 if (bs->backing_hd && bs->backing_hd->encrypted)
2271 return 1;
2272 return bs->encrypted;
2273}
2274
aliguoric0f4ce72009-03-05 23:01:01 +00002275int bdrv_key_required(BlockDriverState *bs)
2276{
2277 BlockDriverState *backing_hd = bs->backing_hd;
2278
2279 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2280 return 1;
2281 return (bs->encrypted && !bs->valid_key);
2282}
2283
bellardea2384d2004-08-01 21:59:26 +00002284int bdrv_set_key(BlockDriverState *bs, const char *key)
2285{
2286 int ret;
2287 if (bs->backing_hd && bs->backing_hd->encrypted) {
2288 ret = bdrv_set_key(bs->backing_hd, key);
2289 if (ret < 0)
2290 return ret;
2291 if (!bs->encrypted)
2292 return 0;
2293 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002294 if (!bs->encrypted) {
2295 return -EINVAL;
2296 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2297 return -ENOMEDIUM;
2298 }
aliguoric0f4ce72009-03-05 23:01:01 +00002299 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002300 if (ret < 0) {
2301 bs->valid_key = 0;
2302 } else if (!bs->valid_key) {
2303 bs->valid_key = 1;
2304 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002305 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002306 }
aliguoric0f4ce72009-03-05 23:01:01 +00002307 return ret;
bellardea2384d2004-08-01 21:59:26 +00002308}
2309
2310void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2311{
bellard19cb3732006-08-19 11:45:59 +00002312 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002313 buf[0] = '\0';
2314 } else {
2315 pstrcpy(buf, buf_size, bs->drv->format_name);
2316 }
2317}
2318
ths5fafdf22007-09-16 21:08:06 +00002319void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002320 void *opaque)
2321{
2322 BlockDriver *drv;
2323
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002324 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002325 it(opaque, drv->format_name);
2326 }
2327}
2328
bellardb3380822004-03-14 21:38:54 +00002329BlockDriverState *bdrv_find(const char *name)
2330{
2331 BlockDriverState *bs;
2332
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002333 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2334 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002335 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002336 }
bellardb3380822004-03-14 21:38:54 +00002337 }
2338 return NULL;
2339}
2340
Markus Armbruster2f399b02010-06-02 18:55:20 +02002341BlockDriverState *bdrv_next(BlockDriverState *bs)
2342{
2343 if (!bs) {
2344 return QTAILQ_FIRST(&bdrv_states);
2345 }
2346 return QTAILQ_NEXT(bs, list);
2347}
2348
aliguori51de9762009-03-05 23:00:43 +00002349void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002350{
2351 BlockDriverState *bs;
2352
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002353 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002354 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002355 }
2356}
2357
bellardea2384d2004-08-01 21:59:26 +00002358const char *bdrv_get_device_name(BlockDriverState *bs)
2359{
2360 return bs->device_name;
2361}
2362
aliguoric6ca28d2008-10-06 13:55:43 +00002363void bdrv_flush_all(void)
2364{
2365 BlockDriverState *bs;
2366
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002367 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002368 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002369 }
aliguoric6ca28d2008-10-06 13:55:43 +00002370}
2371
Kevin Wolff2feebb2010-04-14 17:30:35 +02002372int bdrv_has_zero_init(BlockDriverState *bs)
2373{
2374 assert(bs->drv);
2375
Kevin Wolf336c1c12010-07-28 11:26:29 +02002376 if (bs->drv->bdrv_has_zero_init) {
2377 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002378 }
2379
2380 return 1;
2381}
2382
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002383typedef struct BdrvCoIsAllocatedData {
2384 BlockDriverState *bs;
2385 int64_t sector_num;
2386 int nb_sectors;
2387 int *pnum;
2388 int ret;
2389 bool done;
2390} BdrvCoIsAllocatedData;
2391
thsf58c7b32008-06-05 21:53:49 +00002392/*
2393 * Returns true iff the specified sector is present in the disk image. Drivers
2394 * not implementing the functionality are assumed to not support backing files,
2395 * hence all their sectors are reported as allocated.
2396 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002397 * If 'sector_num' is beyond the end of the disk image the return value is 0
2398 * and 'pnum' is set to 0.
2399 *
thsf58c7b32008-06-05 21:53:49 +00002400 * 'pnum' is set to the number of sectors (including and immediately following
2401 * the specified sector) that are known to be in the same
2402 * allocated/unallocated state.
2403 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002404 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2405 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002406 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002407int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2408 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002409{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002410 int64_t n;
2411
2412 if (sector_num >= bs->total_sectors) {
2413 *pnum = 0;
2414 return 0;
2415 }
2416
2417 n = bs->total_sectors - sector_num;
2418 if (n < nb_sectors) {
2419 nb_sectors = n;
2420 }
2421
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002422 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002423 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002424 return 1;
2425 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002426
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002427 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2428}
2429
2430/* Coroutine wrapper for bdrv_is_allocated() */
2431static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2432{
2433 BdrvCoIsAllocatedData *data = opaque;
2434 BlockDriverState *bs = data->bs;
2435
2436 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2437 data->pnum);
2438 data->done = true;
2439}
2440
2441/*
2442 * Synchronous wrapper around bdrv_co_is_allocated().
2443 *
2444 * See bdrv_co_is_allocated() for details.
2445 */
2446int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2447 int *pnum)
2448{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002449 Coroutine *co;
2450 BdrvCoIsAllocatedData data = {
2451 .bs = bs,
2452 .sector_num = sector_num,
2453 .nb_sectors = nb_sectors,
2454 .pnum = pnum,
2455 .done = false,
2456 };
2457
2458 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2459 qemu_coroutine_enter(co, &data);
2460 while (!data.done) {
2461 qemu_aio_wait();
2462 }
2463 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002464}
2465
Luiz Capitulinob2023812011-09-21 17:16:47 -03002466BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002467{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002468 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002469 BlockDriverState *bs;
2470
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002471 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002472 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002473
Luiz Capitulinob2023812011-09-21 17:16:47 -03002474 info->value = g_malloc0(sizeof(*info->value));
2475 info->value->device = g_strdup(bs->device_name);
2476 info->value->type = g_strdup("unknown");
2477 info->value->locked = bdrv_dev_is_medium_locked(bs);
2478 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002479
Markus Armbrustere4def802011-09-06 18:58:53 +02002480 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002481 info->value->has_tray_open = true;
2482 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002483 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002484
2485 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002486 info->value->has_io_status = true;
2487 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002488 }
2489
bellard19cb3732006-08-19 11:45:59 +00002490 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002491 info->value->has_inserted = true;
2492 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2493 info->value->inserted->file = g_strdup(bs->filename);
2494 info->value->inserted->ro = bs->read_only;
2495 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2496 info->value->inserted->encrypted = bs->encrypted;
2497 if (bs->backing_file[0]) {
2498 info->value->inserted->has_backing_file = true;
2499 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002500 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002501
2502 if (bs->io_limits_enabled) {
2503 info->value->inserted->bps =
2504 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2505 info->value->inserted->bps_rd =
2506 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2507 info->value->inserted->bps_wr =
2508 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2509 info->value->inserted->iops =
2510 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2511 info->value->inserted->iops_rd =
2512 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2513 info->value->inserted->iops_wr =
2514 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2515 }
bellardb3380822004-03-14 21:38:54 +00002516 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002517
2518 /* XXX: waiting for the qapi to support GSList */
2519 if (!cur_item) {
2520 head = cur_item = info;
2521 } else {
2522 cur_item->next = info;
2523 cur_item = info;
2524 }
bellardb3380822004-03-14 21:38:54 +00002525 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002526
Luiz Capitulinob2023812011-09-21 17:16:47 -03002527 return head;
bellardb3380822004-03-14 21:38:54 +00002528}
thsa36e69d2007-12-02 05:18:19 +00002529
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002530/* Consider exposing this as a full fledged QMP command */
2531static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002532{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002533 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002534
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002535 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002536
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002537 if (bs->device_name[0]) {
2538 s->has_device = true;
2539 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002540 }
2541
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002542 s->stats = g_malloc0(sizeof(*s->stats));
2543 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2544 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2545 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2546 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2547 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2548 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2549 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2550 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2551 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2552
Kevin Wolf294cc352010-04-28 14:34:01 +02002553 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002554 s->has_parent = true;
2555 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002556 }
2557
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002558 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002559}
2560
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002561BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002562{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002563 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002564 BlockDriverState *bs;
2565
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002566 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002567 BlockStatsList *info = g_malloc0(sizeof(*info));
2568 info->value = qmp_query_blockstat(bs, NULL);
2569
2570 /* XXX: waiting for the qapi to support GSList */
2571 if (!cur_item) {
2572 head = cur_item = info;
2573 } else {
2574 cur_item->next = info;
2575 cur_item = info;
2576 }
thsa36e69d2007-12-02 05:18:19 +00002577 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002578
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002579 return head;
thsa36e69d2007-12-02 05:18:19 +00002580}
bellardea2384d2004-08-01 21:59:26 +00002581
aliguori045df332009-03-05 23:00:48 +00002582const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2583{
2584 if (bs->backing_hd && bs->backing_hd->encrypted)
2585 return bs->backing_file;
2586 else if (bs->encrypted)
2587 return bs->filename;
2588 else
2589 return NULL;
2590}
2591
ths5fafdf22007-09-16 21:08:06 +00002592void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002593 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002594{
Kevin Wolf3574c602011-10-26 11:02:11 +02002595 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002596}
2597
ths5fafdf22007-09-16 21:08:06 +00002598int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002599 const uint8_t *buf, int nb_sectors)
2600{
2601 BlockDriver *drv = bs->drv;
2602 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002603 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002604 if (!drv->bdrv_write_compressed)
2605 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002606 if (bdrv_check_request(bs, sector_num, nb_sectors))
2607 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002608
Jan Kiszkac6d22832009-11-30 18:21:20 +01002609 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002610 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2611 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002612
bellardfaea38e2006-08-05 21:31:00 +00002613 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2614}
ths3b46e622007-09-17 08:09:54 +00002615
bellardfaea38e2006-08-05 21:31:00 +00002616int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2617{
2618 BlockDriver *drv = bs->drv;
2619 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002620 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002621 if (!drv->bdrv_get_info)
2622 return -ENOTSUP;
2623 memset(bdi, 0, sizeof(*bdi));
2624 return drv->bdrv_get_info(bs, bdi);
2625}
2626
Christoph Hellwig45566e92009-07-10 23:11:57 +02002627int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2628 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002629{
2630 BlockDriver *drv = bs->drv;
2631 if (!drv)
2632 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002633 if (drv->bdrv_save_vmstate)
2634 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2635 if (bs->file)
2636 return bdrv_save_vmstate(bs->file, buf, pos, size);
2637 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002638}
2639
Christoph Hellwig45566e92009-07-10 23:11:57 +02002640int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2641 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002642{
2643 BlockDriver *drv = bs->drv;
2644 if (!drv)
2645 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002646 if (drv->bdrv_load_vmstate)
2647 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2648 if (bs->file)
2649 return bdrv_load_vmstate(bs->file, buf, pos, size);
2650 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002651}
2652
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002653void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2654{
2655 BlockDriver *drv = bs->drv;
2656
2657 if (!drv || !drv->bdrv_debug_event) {
2658 return;
2659 }
2660
2661 return drv->bdrv_debug_event(bs, event);
2662
2663}
2664
bellardfaea38e2006-08-05 21:31:00 +00002665/**************************************************************/
2666/* handling of snapshots */
2667
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002668int bdrv_can_snapshot(BlockDriverState *bs)
2669{
2670 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002671 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002672 return 0;
2673 }
2674
2675 if (!drv->bdrv_snapshot_create) {
2676 if (bs->file != NULL) {
2677 return bdrv_can_snapshot(bs->file);
2678 }
2679 return 0;
2680 }
2681
2682 return 1;
2683}
2684
Blue Swirl199630b2010-07-25 20:49:34 +00002685int bdrv_is_snapshot(BlockDriverState *bs)
2686{
2687 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2688}
2689
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002690BlockDriverState *bdrv_snapshots(void)
2691{
2692 BlockDriverState *bs;
2693
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002694 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002695 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002696 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002697
2698 bs = NULL;
2699 while ((bs = bdrv_next(bs))) {
2700 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002701 bs_snapshots = bs;
2702 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002703 }
2704 }
2705 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002706}
2707
ths5fafdf22007-09-16 21:08:06 +00002708int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002709 QEMUSnapshotInfo *sn_info)
2710{
2711 BlockDriver *drv = bs->drv;
2712 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002713 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002714 if (drv->bdrv_snapshot_create)
2715 return drv->bdrv_snapshot_create(bs, sn_info);
2716 if (bs->file)
2717 return bdrv_snapshot_create(bs->file, sn_info);
2718 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002719}
2720
ths5fafdf22007-09-16 21:08:06 +00002721int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002722 const char *snapshot_id)
2723{
2724 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002725 int ret, open_ret;
2726
bellardfaea38e2006-08-05 21:31:00 +00002727 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002728 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002729 if (drv->bdrv_snapshot_goto)
2730 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2731
2732 if (bs->file) {
2733 drv->bdrv_close(bs);
2734 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2735 open_ret = drv->bdrv_open(bs, bs->open_flags);
2736 if (open_ret < 0) {
2737 bdrv_delete(bs->file);
2738 bs->drv = NULL;
2739 return open_ret;
2740 }
2741 return ret;
2742 }
2743
2744 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002745}
2746
2747int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2748{
2749 BlockDriver *drv = bs->drv;
2750 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002751 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002752 if (drv->bdrv_snapshot_delete)
2753 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2754 if (bs->file)
2755 return bdrv_snapshot_delete(bs->file, snapshot_id);
2756 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002757}
2758
ths5fafdf22007-09-16 21:08:06 +00002759int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002760 QEMUSnapshotInfo **psn_info)
2761{
2762 BlockDriver *drv = bs->drv;
2763 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002764 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002765 if (drv->bdrv_snapshot_list)
2766 return drv->bdrv_snapshot_list(bs, psn_info);
2767 if (bs->file)
2768 return bdrv_snapshot_list(bs->file, psn_info);
2769 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002770}
2771
edison51ef6722010-09-21 19:58:41 -07002772int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2773 const char *snapshot_name)
2774{
2775 BlockDriver *drv = bs->drv;
2776 if (!drv) {
2777 return -ENOMEDIUM;
2778 }
2779 if (!bs->read_only) {
2780 return -EINVAL;
2781 }
2782 if (drv->bdrv_snapshot_load_tmp) {
2783 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2784 }
2785 return -ENOTSUP;
2786}
2787
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002788BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2789 const char *backing_file)
2790{
2791 if (!bs->drv) {
2792 return NULL;
2793 }
2794
2795 if (bs->backing_hd) {
2796 if (strcmp(bs->backing_file, backing_file) == 0) {
2797 return bs->backing_hd;
2798 } else {
2799 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2800 }
2801 }
2802
2803 return NULL;
2804}
2805
bellardfaea38e2006-08-05 21:31:00 +00002806#define NB_SUFFIXES 4
2807
2808char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2809{
2810 static const char suffixes[NB_SUFFIXES] = "KMGT";
2811 int64_t base;
2812 int i;
2813
2814 if (size <= 999) {
2815 snprintf(buf, buf_size, "%" PRId64, size);
2816 } else {
2817 base = 1024;
2818 for(i = 0; i < NB_SUFFIXES; i++) {
2819 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002820 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002821 (double)size / base,
2822 suffixes[i]);
2823 break;
2824 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002825 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002826 ((size + (base >> 1)) / base),
2827 suffixes[i]);
2828 break;
2829 }
2830 base = base * 1024;
2831 }
2832 }
2833 return buf;
2834}
2835
2836char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2837{
2838 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002839#ifdef _WIN32
2840 struct tm *ptm;
2841#else
bellardfaea38e2006-08-05 21:31:00 +00002842 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002843#endif
bellardfaea38e2006-08-05 21:31:00 +00002844 time_t ti;
2845 int64_t secs;
2846
2847 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002848 snprintf(buf, buf_size,
2849 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002850 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2851 } else {
2852 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002853#ifdef _WIN32
2854 ptm = localtime(&ti);
2855 strftime(date_buf, sizeof(date_buf),
2856 "%Y-%m-%d %H:%M:%S", ptm);
2857#else
bellardfaea38e2006-08-05 21:31:00 +00002858 localtime_r(&ti, &tm);
2859 strftime(date_buf, sizeof(date_buf),
2860 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002861#endif
bellardfaea38e2006-08-05 21:31:00 +00002862 secs = sn->vm_clock_nsec / 1000000000;
2863 snprintf(clock_buf, sizeof(clock_buf),
2864 "%02d:%02d:%02d.%03d",
2865 (int)(secs / 3600),
2866 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002867 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002868 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2869 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002870 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002871 sn->id_str, sn->name,
2872 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2873 date_buf,
2874 clock_buf);
2875 }
2876 return buf;
2877}
2878
bellard83f64092006-08-01 16:21:11 +00002879/**************************************************************/
2880/* async I/Os */
2881
aliguori3b69e4b2009-01-22 16:59:24 +00002882BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002883 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002884 BlockDriverCompletionFunc *cb, void *opaque)
2885{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002886 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2887
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002888 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002889 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002890}
2891
aliguorif141eaf2009-04-07 18:43:24 +00002892BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2893 QEMUIOVector *qiov, int nb_sectors,
2894 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002895{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002896 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2897
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002898 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002899 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002900}
2901
Kevin Wolf40b4f532009-09-09 17:53:37 +02002902
2903typedef struct MultiwriteCB {
2904 int error;
2905 int num_requests;
2906 int num_callbacks;
2907 struct {
2908 BlockDriverCompletionFunc *cb;
2909 void *opaque;
2910 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002911 } callbacks[];
2912} MultiwriteCB;
2913
2914static void multiwrite_user_cb(MultiwriteCB *mcb)
2915{
2916 int i;
2917
2918 for (i = 0; i < mcb->num_callbacks; i++) {
2919 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002920 if (mcb->callbacks[i].free_qiov) {
2921 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2922 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002923 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002924 }
2925}
2926
2927static void multiwrite_cb(void *opaque, int ret)
2928{
2929 MultiwriteCB *mcb = opaque;
2930
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002931 trace_multiwrite_cb(mcb, ret);
2932
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002933 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002934 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002935 }
2936
2937 mcb->num_requests--;
2938 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002939 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002940 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002941 }
2942}
2943
2944static int multiwrite_req_compare(const void *a, const void *b)
2945{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002946 const BlockRequest *req1 = a, *req2 = b;
2947
2948 /*
2949 * Note that we can't simply subtract req2->sector from req1->sector
2950 * here as that could overflow the return value.
2951 */
2952 if (req1->sector > req2->sector) {
2953 return 1;
2954 } else if (req1->sector < req2->sector) {
2955 return -1;
2956 } else {
2957 return 0;
2958 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002959}
2960
2961/*
2962 * Takes a bunch of requests and tries to merge them. Returns the number of
2963 * requests that remain after merging.
2964 */
2965static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2966 int num_reqs, MultiwriteCB *mcb)
2967{
2968 int i, outidx;
2969
2970 // Sort requests by start sector
2971 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2972
2973 // Check if adjacent requests touch the same clusters. If so, combine them,
2974 // filling up gaps with zero sectors.
2975 outidx = 0;
2976 for (i = 1; i < num_reqs; i++) {
2977 int merge = 0;
2978 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2979
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002980 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02002981 if (reqs[i].sector <= oldreq_last) {
2982 merge = 1;
2983 }
2984
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002985 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2986 merge = 0;
2987 }
2988
Kevin Wolf40b4f532009-09-09 17:53:37 +02002989 if (merge) {
2990 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002991 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002992 qemu_iovec_init(qiov,
2993 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2994
2995 // Add the first request to the merged one. If the requests are
2996 // overlapping, drop the last sectors of the first request.
2997 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2998 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2999
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003000 // We should need to add any zeros between the two requests
3001 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003002
3003 // Add the second request
3004 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3005
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003006 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003007 reqs[outidx].qiov = qiov;
3008
3009 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3010 } else {
3011 outidx++;
3012 reqs[outidx].sector = reqs[i].sector;
3013 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3014 reqs[outidx].qiov = reqs[i].qiov;
3015 }
3016 }
3017
3018 return outidx + 1;
3019}
3020
3021/*
3022 * Submit multiple AIO write requests at once.
3023 *
3024 * On success, the function returns 0 and all requests in the reqs array have
3025 * been submitted. In error case this function returns -1, and any of the
3026 * requests may or may not be submitted yet. In particular, this means that the
3027 * callback will be called for some of the requests, for others it won't. The
3028 * caller must check the error field of the BlockRequest to wait for the right
3029 * callbacks (if error != 0, no callback will be called).
3030 *
3031 * The implementation may modify the contents of the reqs array, e.g. to merge
3032 * requests. However, the fields opaque and error are left unmodified as they
3033 * are used to signal failure for a single request to the caller.
3034 */
3035int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3036{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003037 MultiwriteCB *mcb;
3038 int i;
3039
Ryan Harper301db7c2011-03-07 10:01:04 -06003040 /* don't submit writes if we don't have a medium */
3041 if (bs->drv == NULL) {
3042 for (i = 0; i < num_reqs; i++) {
3043 reqs[i].error = -ENOMEDIUM;
3044 }
3045 return -1;
3046 }
3047
Kevin Wolf40b4f532009-09-09 17:53:37 +02003048 if (num_reqs == 0) {
3049 return 0;
3050 }
3051
3052 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003053 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003054 mcb->num_requests = 0;
3055 mcb->num_callbacks = num_reqs;
3056
3057 for (i = 0; i < num_reqs; i++) {
3058 mcb->callbacks[i].cb = reqs[i].cb;
3059 mcb->callbacks[i].opaque = reqs[i].opaque;
3060 }
3061
3062 // Check for mergable requests
3063 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3064
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003065 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3066
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003067 /* Run the aio requests. */
3068 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003069 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003070 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003071 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003072 }
3073
3074 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003075}
3076
bellard83f64092006-08-01 16:21:11 +00003077void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003078{
aliguori6bbff9a2009-03-20 18:25:59 +00003079 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003080}
3081
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003082/* block I/O throttling */
3083static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3084 bool is_write, double elapsed_time, uint64_t *wait)
3085{
3086 uint64_t bps_limit = 0;
3087 double bytes_limit, bytes_base, bytes_res;
3088 double slice_time, wait_time;
3089
3090 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3091 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3092 } else if (bs->io_limits.bps[is_write]) {
3093 bps_limit = bs->io_limits.bps[is_write];
3094 } else {
3095 if (wait) {
3096 *wait = 0;
3097 }
3098
3099 return false;
3100 }
3101
3102 slice_time = bs->slice_end - bs->slice_start;
3103 slice_time /= (NANOSECONDS_PER_SECOND);
3104 bytes_limit = bps_limit * slice_time;
3105 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3106 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3107 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3108 }
3109
3110 /* bytes_base: the bytes of data which have been read/written; and
3111 * it is obtained from the history statistic info.
3112 * bytes_res: the remaining bytes of data which need to be read/written.
3113 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3114 * the total time for completing reading/writting all data.
3115 */
3116 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3117
3118 if (bytes_base + bytes_res <= bytes_limit) {
3119 if (wait) {
3120 *wait = 0;
3121 }
3122
3123 return false;
3124 }
3125
3126 /* Calc approx time to dispatch */
3127 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3128
3129 /* When the I/O rate at runtime exceeds the limits,
3130 * bs->slice_end need to be extended in order that the current statistic
3131 * info can be kept until the timer fire, so it is increased and tuned
3132 * based on the result of experiment.
3133 */
3134 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3135 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3136 if (wait) {
3137 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3138 }
3139
3140 return true;
3141}
3142
3143static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3144 double elapsed_time, uint64_t *wait)
3145{
3146 uint64_t iops_limit = 0;
3147 double ios_limit, ios_base;
3148 double slice_time, wait_time;
3149
3150 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3151 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3152 } else if (bs->io_limits.iops[is_write]) {
3153 iops_limit = bs->io_limits.iops[is_write];
3154 } else {
3155 if (wait) {
3156 *wait = 0;
3157 }
3158
3159 return false;
3160 }
3161
3162 slice_time = bs->slice_end - bs->slice_start;
3163 slice_time /= (NANOSECONDS_PER_SECOND);
3164 ios_limit = iops_limit * slice_time;
3165 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3166 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3167 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3168 }
3169
3170 if (ios_base + 1 <= ios_limit) {
3171 if (wait) {
3172 *wait = 0;
3173 }
3174
3175 return false;
3176 }
3177
3178 /* Calc approx time to dispatch */
3179 wait_time = (ios_base + 1) / iops_limit;
3180 if (wait_time > elapsed_time) {
3181 wait_time = wait_time - elapsed_time;
3182 } else {
3183 wait_time = 0;
3184 }
3185
3186 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3187 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3188 if (wait) {
3189 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3190 }
3191
3192 return true;
3193}
3194
3195static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3196 bool is_write, int64_t *wait)
3197{
3198 int64_t now, max_wait;
3199 uint64_t bps_wait = 0, iops_wait = 0;
3200 double elapsed_time;
3201 int bps_ret, iops_ret;
3202
3203 now = qemu_get_clock_ns(vm_clock);
3204 if ((bs->slice_start < now)
3205 && (bs->slice_end > now)) {
3206 bs->slice_end = now + bs->slice_time;
3207 } else {
3208 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3209 bs->slice_start = now;
3210 bs->slice_end = now + bs->slice_time;
3211
3212 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3213 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3214
3215 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3216 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3217 }
3218
3219 elapsed_time = now - bs->slice_start;
3220 elapsed_time /= (NANOSECONDS_PER_SECOND);
3221
3222 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3223 is_write, elapsed_time, &bps_wait);
3224 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3225 elapsed_time, &iops_wait);
3226 if (bps_ret || iops_ret) {
3227 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3228 if (wait) {
3229 *wait = max_wait;
3230 }
3231
3232 now = qemu_get_clock_ns(vm_clock);
3233 if (bs->slice_end < now + max_wait) {
3234 bs->slice_end = now + max_wait;
3235 }
3236
3237 return true;
3238 }
3239
3240 if (wait) {
3241 *wait = 0;
3242 }
3243
3244 return false;
3245}
pbrookce1a14d2006-08-07 02:38:06 +00003246
bellard83f64092006-08-01 16:21:11 +00003247/**************************************************************/
3248/* async block device emulation */
3249
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003250typedef struct BlockDriverAIOCBSync {
3251 BlockDriverAIOCB common;
3252 QEMUBH *bh;
3253 int ret;
3254 /* vector translation state */
3255 QEMUIOVector *qiov;
3256 uint8_t *bounce;
3257 int is_write;
3258} BlockDriverAIOCBSync;
3259
3260static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3261{
Kevin Wolfb666d232010-05-05 11:44:39 +02003262 BlockDriverAIOCBSync *acb =
3263 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003264 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003265 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003266 qemu_aio_release(acb);
3267}
3268
3269static AIOPool bdrv_em_aio_pool = {
3270 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3271 .cancel = bdrv_aio_cancel_em,
3272};
3273
bellard83f64092006-08-01 16:21:11 +00003274static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003275{
pbrookce1a14d2006-08-07 02:38:06 +00003276 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003277
aliguorif141eaf2009-04-07 18:43:24 +00003278 if (!acb->is_write)
3279 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003280 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003281 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003282 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003283 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003284 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003285}
bellardbeac80c2006-06-26 20:08:57 +00003286
aliguorif141eaf2009-04-07 18:43:24 +00003287static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3288 int64_t sector_num,
3289 QEMUIOVector *qiov,
3290 int nb_sectors,
3291 BlockDriverCompletionFunc *cb,
3292 void *opaque,
3293 int is_write)
3294
bellardea2384d2004-08-01 21:59:26 +00003295{
pbrookce1a14d2006-08-07 02:38:06 +00003296 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003297
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003298 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003299 acb->is_write = is_write;
3300 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003301 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003302 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003303
3304 if (is_write) {
3305 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003306 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003307 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003308 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003309 }
3310
pbrookce1a14d2006-08-07 02:38:06 +00003311 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003312
pbrookce1a14d2006-08-07 02:38:06 +00003313 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003314}
3315
aliguorif141eaf2009-04-07 18:43:24 +00003316static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3317 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003318 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003319{
aliguorif141eaf2009-04-07 18:43:24 +00003320 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003321}
3322
aliguorif141eaf2009-04-07 18:43:24 +00003323static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3324 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3325 BlockDriverCompletionFunc *cb, void *opaque)
3326{
3327 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3328}
3329
Kevin Wolf68485422011-06-30 10:05:46 +02003330
3331typedef struct BlockDriverAIOCBCoroutine {
3332 BlockDriverAIOCB common;
3333 BlockRequest req;
3334 bool is_write;
3335 QEMUBH* bh;
3336} BlockDriverAIOCBCoroutine;
3337
3338static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3339{
3340 qemu_aio_flush();
3341}
3342
3343static AIOPool bdrv_em_co_aio_pool = {
3344 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3345 .cancel = bdrv_aio_co_cancel_em,
3346};
3347
Paolo Bonzini35246a62011-10-14 10:41:29 +02003348static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003349{
3350 BlockDriverAIOCBCoroutine *acb = opaque;
3351
3352 acb->common.cb(acb->common.opaque, acb->req.error);
3353 qemu_bh_delete(acb->bh);
3354 qemu_aio_release(acb);
3355}
3356
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003357/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3358static void coroutine_fn bdrv_co_do_rw(void *opaque)
3359{
3360 BlockDriverAIOCBCoroutine *acb = opaque;
3361 BlockDriverState *bs = acb->common.bs;
3362
3363 if (!acb->is_write) {
3364 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003365 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003366 } else {
3367 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003368 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003369 }
3370
Paolo Bonzini35246a62011-10-14 10:41:29 +02003371 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003372 qemu_bh_schedule(acb->bh);
3373}
3374
Kevin Wolf68485422011-06-30 10:05:46 +02003375static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3376 int64_t sector_num,
3377 QEMUIOVector *qiov,
3378 int nb_sectors,
3379 BlockDriverCompletionFunc *cb,
3380 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003381 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003382{
3383 Coroutine *co;
3384 BlockDriverAIOCBCoroutine *acb;
3385
3386 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3387 acb->req.sector = sector_num;
3388 acb->req.nb_sectors = nb_sectors;
3389 acb->req.qiov = qiov;
3390 acb->is_write = is_write;
3391
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003392 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003393 qemu_coroutine_enter(co, acb);
3394
3395 return &acb->common;
3396}
3397
Paolo Bonzini07f07612011-10-17 12:32:12 +02003398static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003399{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003400 BlockDriverAIOCBCoroutine *acb = opaque;
3401 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003402
Paolo Bonzini07f07612011-10-17 12:32:12 +02003403 acb->req.error = bdrv_co_flush(bs);
3404 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003405 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003406}
3407
Paolo Bonzini07f07612011-10-17 12:32:12 +02003408BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003409 BlockDriverCompletionFunc *cb, void *opaque)
3410{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003411 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003412
Paolo Bonzini07f07612011-10-17 12:32:12 +02003413 Coroutine *co;
3414 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003415
Paolo Bonzini07f07612011-10-17 12:32:12 +02003416 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3417 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3418 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003419
Alexander Graf016f5cf2010-05-26 17:51:49 +02003420 return &acb->common;
3421}
3422
Paolo Bonzini4265d622011-10-17 12:32:14 +02003423static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3424{
3425 BlockDriverAIOCBCoroutine *acb = opaque;
3426 BlockDriverState *bs = acb->common.bs;
3427
3428 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3429 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3430 qemu_bh_schedule(acb->bh);
3431}
3432
3433BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3434 int64_t sector_num, int nb_sectors,
3435 BlockDriverCompletionFunc *cb, void *opaque)
3436{
3437 Coroutine *co;
3438 BlockDriverAIOCBCoroutine *acb;
3439
3440 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3441
3442 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3443 acb->req.sector = sector_num;
3444 acb->req.nb_sectors = nb_sectors;
3445 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3446 qemu_coroutine_enter(co, acb);
3447
3448 return &acb->common;
3449}
3450
bellardea2384d2004-08-01 21:59:26 +00003451void bdrv_init(void)
3452{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003453 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003454}
pbrookce1a14d2006-08-07 02:38:06 +00003455
Markus Armbrustereb852012009-10-27 18:41:44 +01003456void bdrv_init_with_whitelist(void)
3457{
3458 use_bdrv_whitelist = 1;
3459 bdrv_init();
3460}
3461
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003462void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3463 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003464{
pbrookce1a14d2006-08-07 02:38:06 +00003465 BlockDriverAIOCB *acb;
3466
aliguori6bbff9a2009-03-20 18:25:59 +00003467 if (pool->free_aiocb) {
3468 acb = pool->free_aiocb;
3469 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003470 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003471 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003472 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003473 }
3474 acb->bs = bs;
3475 acb->cb = cb;
3476 acb->opaque = opaque;
3477 return acb;
3478}
3479
3480void qemu_aio_release(void *p)
3481{
aliguori6bbff9a2009-03-20 18:25:59 +00003482 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3483 AIOPool *pool = acb->pool;
3484 acb->next = pool->free_aiocb;
3485 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003486}
bellard19cb3732006-08-19 11:45:59 +00003487
3488/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003489/* Coroutine block device emulation */
3490
3491typedef struct CoroutineIOCompletion {
3492 Coroutine *coroutine;
3493 int ret;
3494} CoroutineIOCompletion;
3495
3496static void bdrv_co_io_em_complete(void *opaque, int ret)
3497{
3498 CoroutineIOCompletion *co = opaque;
3499
3500 co->ret = ret;
3501 qemu_coroutine_enter(co->coroutine, NULL);
3502}
3503
3504static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3505 int nb_sectors, QEMUIOVector *iov,
3506 bool is_write)
3507{
3508 CoroutineIOCompletion co = {
3509 .coroutine = qemu_coroutine_self(),
3510 };
3511 BlockDriverAIOCB *acb;
3512
3513 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003514 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3515 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003516 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003517 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3518 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003519 }
3520
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003521 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003522 if (!acb) {
3523 return -EIO;
3524 }
3525 qemu_coroutine_yield();
3526
3527 return co.ret;
3528}
3529
3530static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3531 int64_t sector_num, int nb_sectors,
3532 QEMUIOVector *iov)
3533{
3534 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3535}
3536
3537static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3538 int64_t sector_num, int nb_sectors,
3539 QEMUIOVector *iov)
3540{
3541 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3542}
3543
Paolo Bonzini07f07612011-10-17 12:32:12 +02003544static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003545{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003546 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003547
Paolo Bonzini07f07612011-10-17 12:32:12 +02003548 rwco->ret = bdrv_co_flush(rwco->bs);
3549}
3550
3551int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3552{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003553 int ret;
3554
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003555 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003556 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003557 }
3558
Kevin Wolfca716362011-11-10 18:13:59 +01003559 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003560 if (bs->drv->bdrv_co_flush_to_os) {
3561 ret = bs->drv->bdrv_co_flush_to_os(bs);
3562 if (ret < 0) {
3563 return ret;
3564 }
3565 }
3566
Kevin Wolfca716362011-11-10 18:13:59 +01003567 /* But don't actually force it to the disk with cache=unsafe */
3568 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3569 return 0;
3570 }
3571
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003572 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003573 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003574 } else if (bs->drv->bdrv_aio_flush) {
3575 BlockDriverAIOCB *acb;
3576 CoroutineIOCompletion co = {
3577 .coroutine = qemu_coroutine_self(),
3578 };
3579
3580 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3581 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003582 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003583 } else {
3584 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003585 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003586 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003587 } else {
3588 /*
3589 * Some block drivers always operate in either writethrough or unsafe
3590 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3591 * know how the server works (because the behaviour is hardcoded or
3592 * depends on server-side configuration), so we can't ensure that
3593 * everything is safe on disk. Returning an error doesn't work because
3594 * that would break guests even if the server operates in writethrough
3595 * mode.
3596 *
3597 * Let's hope the user knows what he's doing.
3598 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003599 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003600 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003601 if (ret < 0) {
3602 return ret;
3603 }
3604
3605 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3606 * in the case of cache=unsafe, so there are no useless flushes.
3607 */
3608 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003609}
3610
Anthony Liguori0f154232011-11-14 15:09:45 -06003611void bdrv_invalidate_cache(BlockDriverState *bs)
3612{
3613 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3614 bs->drv->bdrv_invalidate_cache(bs);
3615 }
3616}
3617
3618void bdrv_invalidate_cache_all(void)
3619{
3620 BlockDriverState *bs;
3621
3622 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3623 bdrv_invalidate_cache(bs);
3624 }
3625}
3626
Paolo Bonzini07f07612011-10-17 12:32:12 +02003627int bdrv_flush(BlockDriverState *bs)
3628{
3629 Coroutine *co;
3630 RwCo rwco = {
3631 .bs = bs,
3632 .ret = NOT_DONE,
3633 };
3634
3635 if (qemu_in_coroutine()) {
3636 /* Fast-path if already in coroutine context */
3637 bdrv_flush_co_entry(&rwco);
3638 } else {
3639 co = qemu_coroutine_create(bdrv_flush_co_entry);
3640 qemu_coroutine_enter(co, &rwco);
3641 while (rwco.ret == NOT_DONE) {
3642 qemu_aio_wait();
3643 }
3644 }
3645
3646 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003647}
3648
Paolo Bonzini4265d622011-10-17 12:32:14 +02003649static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3650{
3651 RwCo *rwco = opaque;
3652
3653 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3654}
3655
3656int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3657 int nb_sectors)
3658{
3659 if (!bs->drv) {
3660 return -ENOMEDIUM;
3661 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3662 return -EIO;
3663 } else if (bs->read_only) {
3664 return -EROFS;
3665 } else if (bs->drv->bdrv_co_discard) {
3666 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3667 } else if (bs->drv->bdrv_aio_discard) {
3668 BlockDriverAIOCB *acb;
3669 CoroutineIOCompletion co = {
3670 .coroutine = qemu_coroutine_self(),
3671 };
3672
3673 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3674 bdrv_co_io_em_complete, &co);
3675 if (acb == NULL) {
3676 return -EIO;
3677 } else {
3678 qemu_coroutine_yield();
3679 return co.ret;
3680 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003681 } else {
3682 return 0;
3683 }
3684}
3685
3686int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3687{
3688 Coroutine *co;
3689 RwCo rwco = {
3690 .bs = bs,
3691 .sector_num = sector_num,
3692 .nb_sectors = nb_sectors,
3693 .ret = NOT_DONE,
3694 };
3695
3696 if (qemu_in_coroutine()) {
3697 /* Fast-path if already in coroutine context */
3698 bdrv_discard_co_entry(&rwco);
3699 } else {
3700 co = qemu_coroutine_create(bdrv_discard_co_entry);
3701 qemu_coroutine_enter(co, &rwco);
3702 while (rwco.ret == NOT_DONE) {
3703 qemu_aio_wait();
3704 }
3705 }
3706
3707 return rwco.ret;
3708}
3709
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003710/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003711/* removable device support */
3712
3713/**
3714 * Return TRUE if the media is present
3715 */
3716int bdrv_is_inserted(BlockDriverState *bs)
3717{
3718 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003719
bellard19cb3732006-08-19 11:45:59 +00003720 if (!drv)
3721 return 0;
3722 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003723 return 1;
3724 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003725}
3726
3727/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003728 * Return whether the media changed since the last call to this
3729 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003730 */
3731int bdrv_media_changed(BlockDriverState *bs)
3732{
3733 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003734
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003735 if (drv && drv->bdrv_media_changed) {
3736 return drv->bdrv_media_changed(bs);
3737 }
3738 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003739}
3740
3741/**
3742 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3743 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003744void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003745{
3746 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003747
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003748 if (drv && drv->bdrv_eject) {
3749 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003750 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003751
3752 if (bs->device_name[0] != '\0') {
3753 bdrv_emit_qmp_eject_event(bs, eject_flag);
3754 }
bellard19cb3732006-08-19 11:45:59 +00003755}
3756
bellard19cb3732006-08-19 11:45:59 +00003757/**
3758 * Lock or unlock the media (if it is locked, the user won't be able
3759 * to eject it manually).
3760 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003761void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003762{
3763 BlockDriver *drv = bs->drv;
3764
Markus Armbruster025e8492011-09-06 18:58:47 +02003765 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003766
Markus Armbruster025e8492011-09-06 18:58:47 +02003767 if (drv && drv->bdrv_lock_medium) {
3768 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003769 }
3770}
ths985a03b2007-12-24 16:10:43 +00003771
3772/* needed for generic scsi interface */
3773
3774int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3775{
3776 BlockDriver *drv = bs->drv;
3777
3778 if (drv && drv->bdrv_ioctl)
3779 return drv->bdrv_ioctl(bs, req, buf);
3780 return -ENOTSUP;
3781}
aliguori7d780662009-03-12 19:57:08 +00003782
aliguori221f7152009-03-28 17:28:41 +00003783BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3784 unsigned long int req, void *buf,
3785 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003786{
aliguori221f7152009-03-28 17:28:41 +00003787 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003788
aliguori221f7152009-03-28 17:28:41 +00003789 if (drv && drv->bdrv_aio_ioctl)
3790 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3791 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003792}
aliguorie268ca52009-04-22 20:20:00 +00003793
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003794void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3795{
3796 bs->buffer_alignment = align;
3797}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003798
aliguorie268ca52009-04-22 20:20:00 +00003799void *qemu_blockalign(BlockDriverState *bs, size_t size)
3800{
3801 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3802}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003803
3804void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3805{
3806 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003807
Liran Schouraaa0eb72010-01-26 10:31:48 +02003808 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003809 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003810 if (!bs->dirty_bitmap) {
3811 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3812 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3813 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003814
Anthony Liguori7267c092011-08-20 22:09:37 -05003815 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003816 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003817 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003818 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003819 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003820 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003821 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003822 }
3823}
3824
3825int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3826{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003827 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003828
Jan Kiszkac6d22832009-11-30 18:21:20 +01003829 if (bs->dirty_bitmap &&
3830 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003831 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3832 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003833 } else {
3834 return 0;
3835 }
3836}
3837
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003838void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3839 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003840{
3841 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3842}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003843
3844int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3845{
3846 return bs->dirty_count;
3847}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003848
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003849void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3850{
3851 assert(bs->in_use != in_use);
3852 bs->in_use = in_use;
3853}
3854
3855int bdrv_in_use(BlockDriverState *bs)
3856{
3857 return bs->in_use;
3858}
3859
Luiz Capitulino28a72822011-09-26 17:43:50 -03003860void bdrv_iostatus_enable(BlockDriverState *bs)
3861{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003862 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003863 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003864}
3865
3866/* The I/O status is only enabled if the drive explicitly
3867 * enables it _and_ the VM is configured to stop on errors */
3868bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3869{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003870 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003871 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3872 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3873 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3874}
3875
3876void bdrv_iostatus_disable(BlockDriverState *bs)
3877{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003878 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003879}
3880
3881void bdrv_iostatus_reset(BlockDriverState *bs)
3882{
3883 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003884 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003885 }
3886}
3887
3888/* XXX: Today this is set by device models because it makes the implementation
3889 quite simple. However, the block layer knows about the error, so it's
3890 possible to implement this without device models being involved */
3891void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3892{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003893 if (bdrv_iostatus_is_enabled(bs) &&
3894 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003895 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003896 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3897 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003898 }
3899}
3900
Christoph Hellwiga597e792011-08-25 08:26:01 +02003901void
3902bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3903 enum BlockAcctType type)
3904{
3905 assert(type < BDRV_MAX_IOTYPE);
3906
3907 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003908 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003909 cookie->type = type;
3910}
3911
3912void
3913bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3914{
3915 assert(cookie->type < BDRV_MAX_IOTYPE);
3916
3917 bs->nr_bytes[cookie->type] += cookie->bytes;
3918 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003919 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003920}
3921
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003922int bdrv_img_create(const char *filename, const char *fmt,
3923 const char *base_filename, const char *base_fmt,
3924 char *options, uint64_t img_size, int flags)
3925{
3926 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003927 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003928 BlockDriverState *bs = NULL;
3929 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003930 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003931 int ret = 0;
3932
3933 /* Find driver and parse its options */
3934 drv = bdrv_find_format(fmt);
3935 if (!drv) {
3936 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003937 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003938 goto out;
3939 }
3940
3941 proto_drv = bdrv_find_protocol(filename);
3942 if (!proto_drv) {
3943 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003944 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003945 goto out;
3946 }
3947
3948 create_options = append_option_parameters(create_options,
3949 drv->create_options);
3950 create_options = append_option_parameters(create_options,
3951 proto_drv->create_options);
3952
3953 /* Create parameter list with default values */
3954 param = parse_option_parameters("", create_options, param);
3955
3956 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3957
3958 /* Parse -o options */
3959 if (options) {
3960 param = parse_option_parameters(options, create_options, param);
3961 if (param == NULL) {
3962 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003963 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003964 goto out;
3965 }
3966 }
3967
3968 if (base_filename) {
3969 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3970 base_filename)) {
3971 error_report("Backing file not supported for file format '%s'",
3972 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003973 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003974 goto out;
3975 }
3976 }
3977
3978 if (base_fmt) {
3979 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3980 error_report("Backing file format not supported for file "
3981 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003982 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003983 goto out;
3984 }
3985 }
3986
Jes Sorensen792da932010-12-16 13:52:17 +01003987 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3988 if (backing_file && backing_file->value.s) {
3989 if (!strcmp(filename, backing_file->value.s)) {
3990 error_report("Error: Trying to create an image with the "
3991 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003992 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003993 goto out;
3994 }
3995 }
3996
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003997 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3998 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003999 backing_drv = bdrv_find_format(backing_fmt->value.s);
4000 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004001 error_report("Unknown backing file format '%s'",
4002 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004003 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004004 goto out;
4005 }
4006 }
4007
4008 // The size for the image must always be specified, with one exception:
4009 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004010 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4011 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004012 if (backing_file && backing_file->value.s) {
4013 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004014 char buf[32];
4015
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004016 bs = bdrv_new("");
4017
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004018 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004019 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004020 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004021 goto out;
4022 }
4023 bdrv_get_geometry(bs, &size);
4024 size *= 512;
4025
4026 snprintf(buf, sizeof(buf), "%" PRId64, size);
4027 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4028 } else {
4029 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004030 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004031 goto out;
4032 }
4033 }
4034
4035 printf("Formatting '%s', fmt=%s ", filename, fmt);
4036 print_option_parameters(param);
4037 puts("");
4038
4039 ret = bdrv_create(drv, filename, param);
4040
4041 if (ret < 0) {
4042 if (ret == -ENOTSUP) {
4043 error_report("Formatting or formatting option not supported for "
4044 "file format '%s'", fmt);
4045 } else if (ret == -EFBIG) {
4046 error_report("The image size is too large for file format '%s'",
4047 fmt);
4048 } else {
4049 error_report("%s: error while creating %s: %s", filename, fmt,
4050 strerror(-ret));
4051 }
4052 }
4053
4054out:
4055 free_option_parameters(create_options);
4056 free_option_parameters(param);
4057
4058 if (bs) {
4059 bdrv_delete(bs);
4060 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004061
4062 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004063}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004064
4065void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
4066 BlockDriverCompletionFunc *cb, void *opaque)
4067{
4068 BlockJob *job;
4069
4070 if (bs->job || bdrv_in_use(bs)) {
4071 return NULL;
4072 }
4073 bdrv_set_in_use(bs, 1);
4074
4075 job = g_malloc0(job_type->instance_size);
4076 job->job_type = job_type;
4077 job->bs = bs;
4078 job->cb = cb;
4079 job->opaque = opaque;
4080 bs->job = job;
4081 return job;
4082}
4083
4084void block_job_complete(BlockJob *job, int ret)
4085{
4086 BlockDriverState *bs = job->bs;
4087
4088 assert(bs->job == job);
4089 job->cb(job->opaque, ret);
4090 bs->job = NULL;
4091 g_free(job);
4092 bdrv_set_in_use(bs, 0);
4093}
4094
4095int block_job_set_speed(BlockJob *job, int64_t value)
4096{
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004097 int rc;
4098
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004099 if (!job->job_type->set_speed) {
4100 return -ENOTSUP;
4101 }
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004102 rc = job->job_type->set_speed(job, value);
4103 if (rc == 0) {
4104 job->speed = value;
4105 }
4106 return rc;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004107}
4108
4109void block_job_cancel(BlockJob *job)
4110{
4111 job->cancelled = true;
4112}
4113
4114bool block_job_is_cancelled(BlockJob *job)
4115{
4116 return job->cancelled;
4117}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004118
4119void block_job_cancel_sync(BlockJob *job)
4120{
4121 BlockDriverState *bs = job->bs;
4122
4123 assert(bs->job == job);
4124 block_job_cancel(job);
4125 while (bs->job != NULL && bs->job->busy) {
4126 qemu_aio_wait();
4127 }
4128}