blob: 4b96654e7d6264a32b393a929c47be3edba2c156 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080084static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010094static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000096
Markus Armbrusterf9092b12010-06-25 10:33:39 +020097/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800124void bdrv_io_limits_disable(BlockDriverState *bs)
125{
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140}
141
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800142static void bdrv_block_timer(void *opaque)
143{
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147}
148
149void bdrv_io_limits_enable(BlockDriverState *bs)
150{
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158}
159
160bool bdrv_io_limits_enabled(BlockDriverState *bs)
161{
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169}
170
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800171static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173{
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194}
195
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000196/* check if the path starts with "<protocol>:" */
197static int path_has_protocol(const char *path)
198{
199#ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204#endif
205
206 return strchr(path, ':') != NULL;
207}
208
bellard83f64092006-08-01 16:21:11 +0000209int path_is_absolute(const char *path)
210{
211 const char *p;
bellard21664422007-01-07 18:22:37 +0000212#ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216#endif
bellard83f64092006-08-01 16:21:11 +0000217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000222#ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224#else
225 return (*p == '/');
226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200342int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000344{
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200347
348 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000349}
350
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352{
353 BlockDriver *drv;
354
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900355 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000357 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200358 }
359
360 return bdrv_create(drv, filename, options);
361}
362
bellardd5249392004-08-03 21:14:23 +0000363#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000364void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000365{
bellard3b9f94e2007-01-07 17:27:07 +0000366 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000367
bellard3b9f94e2007-01-07 17:27:07 +0000368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000370}
371#else
bellard95389c82005-12-18 18:28:15 +0000372void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000373{
374 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000375 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000376 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000381 fd = mkstemp(filename);
382 close(fd);
383}
bellardd5249392004-08-03 21:14:23 +0000384#endif
bellardea2384d2004-08-01 21:59:26 +0000385
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200386/*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200390static BlockDriver *find_hdev_driver(const char *filename)
391{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200394
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100395 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200403 }
404
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200405 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200406}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200407
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900408BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200409{
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
Christoph Hellwig39508e72010-06-23 12:25:17 +0200417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200426 return drv1;
427 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000429 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200430 return bdrv_find_format("file");
431 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000432 p = strchr(filename, ':');
433 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446}
447
Stefan Weilc98ac352010-07-21 21:51:51 +0200448static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000449{
bellard83f64092006-08-01 16:21:11 +0000450 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000451 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000452 uint8_t buf[2048];
453 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000454
Naphtali Spreif5edb012010-01-17 16:48:13 +0200455 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700460
Kevin Wolf08a00552010-06-01 18:37:31 +0200461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700470 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700471
bellard83f64092006-08-01 16:21:11 +0000472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200475 *pdrv = NULL;
476 return ret;
bellard83f64092006-08-01 16:21:11 +0000477 }
478
bellardea2384d2004-08-01 21:59:26 +0000479 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
bellardea2384d2004-08-01 21:59:26 +0000488 }
489 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
bellardea2384d2004-08-01 21:59:26 +0000495}
496
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100497/**
498 * Set the current 'total_sectors' value
499 */
500static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501{
502 BlockDriver *drv = bs->drv;
503
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519}
520
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100521/**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526int bdrv_parse_cache_flags(const char *mode, int *flags)
527{
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546}
547
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000548/**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553void bdrv_enable_copy_on_read(BlockDriverState *bs)
554{
555 bs->copy_on_read++;
556}
557
558void bdrv_disable_copy_on_read(BlockDriverState *bs)
559{
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562}
563
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200564/*
Kevin Wolf57915332010-04-14 15:24:50 +0200565 * Common part for opening disk images and files
566 */
567static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569{
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200576 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100577 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200578 bs->encrypted = 0;
579 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100582 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200583 bs->buffer_alignment = 512;
584
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
Kevin Wolf57915332010-04-14 15:24:50 +0200590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100591 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500598 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200609 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
Kevin Wolf57915332010-04-14 15:24:50 +0200627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200634 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100635
Kevin Wolf57915332010-04-14 15:24:50 +0200636#ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640#endif
641 return 0;
642
643free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500648 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652}
653
654/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
bellard83f64092006-08-01 16:21:11 +0000657int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000658{
bellard83f64092006-08-01 16:21:11 +0000659 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200660 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000661 int ret;
662
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900663 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200664 if (!drv) {
665 return -ENOENT;
666 }
667
bellard83f64092006-08-01 16:21:11 +0000668 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200669 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000673 }
aliguori71d07702009-03-03 17:37:16 +0000674 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000675 *pbs = bs;
676 return 0;
bellardea2384d2004-08-01 21:59:26 +0000677}
bellardfc01f7e2003-06-30 10:03:06 +0000678
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200679/*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200682int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000684{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200685 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200686 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000687
bellard83f64092006-08-01 16:21:11 +0000688 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000689 BlockDriverState *bs1;
690 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000691 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200694 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000695
bellardea2384d2004-08-01 21:59:26 +0000696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200701 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000702 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000703 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000704 return ret;
bellardea2384d2004-08-01 21:59:26 +0000705 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
bellardea2384d2004-08-01 21:59:26 +0000711 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000712
bellardea2384d2004-08-01 21:59:26 +0000713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000719 else if (!realpath(filename, backing_filename))
720 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000721
Kevin Wolf91a073a2009-05-27 14:48:06 +0200722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
Jes Sorensen3e829902010-05-27 16:20:30 +0200725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200733 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000734 if (ret < 0) {
735 return ret;
bellardea2384d2004-08-01 21:59:26 +0000736 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737
bellardea2384d2004-08-01 21:59:26 +0000738 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200739 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000740 bs->is_temporary = 1;
741 }
bellard712e7872005-04-28 21:09:32 +0000742
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200743 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200744 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200745 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000746 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100747
aliguori51d7c002009-03-05 23:00:29 +0000748 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000749 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000750 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100755 goto unlink_and_fail;
756 }
757
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000776 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200796 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 }
798
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200804 return 0;
805
806unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811}
812
bellardfc01f7e2003-06-30 10:03:06 +0000813void bdrv_close(BlockDriverState *bs)
814{
bellard19cb3732006-08-19 11:45:59 +0000815 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +0200816 if (bs->job) {
817 block_job_cancel_sync(bs->job);
818 }
Kevin Wolf7094f122012-04-11 11:06:37 +0200819 bdrv_drain_all();
820
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200821 if (bs == bs_snapshots) {
822 bs_snapshots = NULL;
823 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100824 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000825 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100826 bs->backing_hd = NULL;
827 }
bellardea2384d2004-08-01 21:59:26 +0000828 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500829 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000830#ifdef _WIN32
831 if (bs->is_temporary) {
832 unlink(bs->filename);
833 }
bellard67b915a2004-03-31 23:37:16 +0000834#endif
bellardea2384d2004-08-01 21:59:26 +0000835 bs->opaque = NULL;
836 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000837 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000838
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200839 if (bs->file != NULL) {
840 bdrv_close(bs->file);
841 }
842
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200843 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000844 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800845
846 /*throttling disk I/O limits*/
847 if (bs->io_limits_enabled) {
848 bdrv_io_limits_disable(bs);
849 }
bellardb3380822004-03-14 21:38:54 +0000850}
851
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900852void bdrv_close_all(void)
853{
854 BlockDriverState *bs;
855
856 QTAILQ_FOREACH(bs, &bdrv_states, list) {
857 bdrv_close(bs);
858 }
859}
860
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000861/*
862 * Wait for pending requests to complete across all BlockDriverStates
863 *
864 * This function does not flush data to disk, use bdrv_flush_all() for that
865 * after calling this function.
866 */
867void bdrv_drain_all(void)
868{
869 BlockDriverState *bs;
870
871 qemu_aio_flush();
872
873 /* If requests are still pending there is a bug somewhere */
874 QTAILQ_FOREACH(bs, &bdrv_states, list) {
875 assert(QLIST_EMPTY(&bs->tracked_requests));
876 assert(qemu_co_queue_empty(&bs->throttled_reqs));
877 }
878}
879
Ryan Harperd22b2f42011-03-29 20:51:47 -0500880/* make a BlockDriverState anonymous by removing from bdrv_state list.
881 Also, NULL terminate the device_name to prevent double remove */
882void bdrv_make_anon(BlockDriverState *bs)
883{
884 if (bs->device_name[0] != '\0') {
885 QTAILQ_REMOVE(&bdrv_states, bs, list);
886 }
887 bs->device_name[0] = '\0';
888}
889
Jeff Cody8802d1f2012-02-28 15:54:06 -0500890/*
891 * Add new bs contents at the top of an image chain while the chain is
892 * live, while keeping required fields on the top layer.
893 *
894 * This will modify the BlockDriverState fields, and swap contents
895 * between bs_new and bs_top. Both bs_new and bs_top are modified.
896 *
Jeff Codyf6801b82012-03-27 16:30:19 -0400897 * bs_new is required to be anonymous.
898 *
Jeff Cody8802d1f2012-02-28 15:54:06 -0500899 * This function does not create any image files.
900 */
901void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
902{
903 BlockDriverState tmp;
904
Jeff Codyf6801b82012-03-27 16:30:19 -0400905 /* bs_new must be anonymous */
906 assert(bs_new->device_name[0] == '\0');
Jeff Cody8802d1f2012-02-28 15:54:06 -0500907
908 tmp = *bs_new;
909
910 /* there are some fields that need to stay on the top layer: */
911
912 /* dev info */
913 tmp.dev_ops = bs_top->dev_ops;
914 tmp.dev_opaque = bs_top->dev_opaque;
915 tmp.dev = bs_top->dev;
916 tmp.buffer_alignment = bs_top->buffer_alignment;
917 tmp.copy_on_read = bs_top->copy_on_read;
918
919 /* i/o timing parameters */
920 tmp.slice_time = bs_top->slice_time;
921 tmp.slice_start = bs_top->slice_start;
922 tmp.slice_end = bs_top->slice_end;
923 tmp.io_limits = bs_top->io_limits;
924 tmp.io_base = bs_top->io_base;
925 tmp.throttled_reqs = bs_top->throttled_reqs;
926 tmp.block_timer = bs_top->block_timer;
927 tmp.io_limits_enabled = bs_top->io_limits_enabled;
928
929 /* geometry */
930 tmp.cyls = bs_top->cyls;
931 tmp.heads = bs_top->heads;
932 tmp.secs = bs_top->secs;
933 tmp.translation = bs_top->translation;
934
935 /* r/w error */
936 tmp.on_read_error = bs_top->on_read_error;
937 tmp.on_write_error = bs_top->on_write_error;
938
939 /* i/o status */
940 tmp.iostatus_enabled = bs_top->iostatus_enabled;
941 tmp.iostatus = bs_top->iostatus;
942
943 /* keep the same entry in bdrv_states */
944 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
945 tmp.list = bs_top->list;
946
947 /* The contents of 'tmp' will become bs_top, as we are
948 * swapping bs_new and bs_top contents. */
949 tmp.backing_hd = bs_new;
950 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
Jeff Codyf6801b82012-03-27 16:30:19 -0400951 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
Jeff Cody8802d1f2012-02-28 15:54:06 -0500952
953 /* swap contents of the fixed new bs and the current top */
954 *bs_new = *bs_top;
955 *bs_top = tmp;
956
Jeff Codyf6801b82012-03-27 16:30:19 -0400957 /* device_name[] was carried over from the old bs_top. bs_new
958 * shouldn't be in bdrv_states, so we need to make device_name[]
959 * reflect the anonymity of bs_new
960 */
961 bs_new->device_name[0] = '\0';
962
Jeff Cody8802d1f2012-02-28 15:54:06 -0500963 /* clear the copied fields in the new backing file */
964 bdrv_detach_dev(bs_new, bs_new->dev);
965
966 qemu_co_queue_init(&bs_new->throttled_reqs);
967 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
968 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
969 bdrv_iostatus_disable(bs_new);
970
971 /* we don't use bdrv_io_limits_disable() for this, because we don't want
972 * to affect or delete the block_timer, as it has been moved to bs_top */
973 bs_new->io_limits_enabled = false;
974 bs_new->block_timer = NULL;
975 bs_new->slice_time = 0;
976 bs_new->slice_start = 0;
977 bs_new->slice_end = 0;
978}
979
bellardb3380822004-03-14 21:38:54 +0000980void bdrv_delete(BlockDriverState *bs)
981{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200982 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +0200983 assert(!bs->job);
984 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +0200985
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100986 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500987 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000988
bellardb3380822004-03-14 21:38:54 +0000989 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200990 if (bs->file != NULL) {
991 bdrv_delete(bs->file);
992 }
993
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200994 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500995 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000996}
997
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200998int bdrv_attach_dev(BlockDriverState *bs, void *dev)
999/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001000{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001001 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001002 return -EBUSY;
1003 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001004 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001005 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001006 return 0;
1007}
1008
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001009/* TODO qdevified devices don't use this, remove when devices are qdevified */
1010void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001011{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001012 if (bdrv_attach_dev(bs, dev) < 0) {
1013 abort();
1014 }
1015}
1016
1017void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1018/* TODO change to DeviceState *dev when all users are qdevified */
1019{
1020 assert(bs->dev == dev);
1021 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001022 bs->dev_ops = NULL;
1023 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001024 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001025}
1026
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001027/* TODO change to return DeviceState * when all users are qdevified */
1028void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001029{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001030 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001031}
1032
Markus Armbruster0e49de52011-08-03 15:07:41 +02001033void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1034 void *opaque)
1035{
1036 bs->dev_ops = ops;
1037 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001038 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1039 bs_snapshots = NULL;
1040 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001041}
1042
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001043void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1044 BlockQMPEventAction action, int is_read)
1045{
1046 QObject *data;
1047 const char *action_str;
1048
1049 switch (action) {
1050 case BDRV_ACTION_REPORT:
1051 action_str = "report";
1052 break;
1053 case BDRV_ACTION_IGNORE:
1054 action_str = "ignore";
1055 break;
1056 case BDRV_ACTION_STOP:
1057 action_str = "stop";
1058 break;
1059 default:
1060 abort();
1061 }
1062
1063 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1064 bdrv->device_name,
1065 action_str,
1066 is_read ? "read" : "write");
1067 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1068
1069 qobject_decref(data);
1070}
1071
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001072static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1073{
1074 QObject *data;
1075
1076 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1077 bdrv_get_device_name(bs), ejected);
1078 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1079
1080 qobject_decref(data);
1081}
1082
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001083static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001084{
Markus Armbruster145feb12011-08-03 15:07:42 +02001085 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001086 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001087 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001088 if (tray_was_closed) {
1089 /* tray open */
1090 bdrv_emit_qmp_eject_event(bs, true);
1091 }
1092 if (load) {
1093 /* tray close */
1094 bdrv_emit_qmp_eject_event(bs, false);
1095 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001096 }
1097}
1098
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001099bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1100{
1101 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1102}
1103
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001104void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1105{
1106 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1107 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1108 }
1109}
1110
Markus Armbrustere4def802011-09-06 18:58:53 +02001111bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1112{
1113 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1114 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1115 }
1116 return false;
1117}
1118
Markus Armbruster145feb12011-08-03 15:07:42 +02001119static void bdrv_dev_resize_cb(BlockDriverState *bs)
1120{
1121 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1122 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001123 }
1124}
1125
Markus Armbrusterf1076392011-09-06 18:58:46 +02001126bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1127{
1128 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1129 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1130 }
1131 return false;
1132}
1133
aliguorie97fc192009-04-21 23:11:50 +00001134/*
1135 * Run consistency checks on an image
1136 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001137 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001138 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001139 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001140 */
Kevin Wolfe076f332010-06-29 11:43:13 +02001141int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +00001142{
1143 if (bs->drv->bdrv_check == NULL) {
1144 return -ENOTSUP;
1145 }
1146
Kevin Wolfe076f332010-06-29 11:43:13 +02001147 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001148 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001149}
1150
Kevin Wolf8a426612010-07-16 17:17:01 +02001151#define COMMIT_BUF_SECTORS 2048
1152
bellard33e39632003-07-06 17:15:21 +00001153/* commit COW file into the raw image */
1154int bdrv_commit(BlockDriverState *bs)
1155{
bellard19cb3732006-08-19 11:45:59 +00001156 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001157 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001158 int64_t sector, total_sectors;
1159 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001160 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001161 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001162 char filename[1024];
1163 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001164
bellard19cb3732006-08-19 11:45:59 +00001165 if (!drv)
1166 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001167
1168 if (!bs->backing_hd) {
1169 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001170 }
1171
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001172 if (bs->backing_hd->keep_read_only) {
1173 return -EACCES;
1174 }
Kevin Wolfee181192010-08-05 13:05:22 +02001175
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001176 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1177 return -EBUSY;
1178 }
1179
Kevin Wolfee181192010-08-05 13:05:22 +02001180 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001181 ro = bs->backing_hd->read_only;
1182 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1183 open_flags = bs->backing_hd->open_flags;
1184
1185 if (ro) {
1186 /* re-open as RW */
1187 bdrv_delete(bs->backing_hd);
1188 bs->backing_hd = NULL;
1189 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001190 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1191 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001192 if (rw_ret < 0) {
1193 bdrv_delete(bs_rw);
1194 /* try to re-open read-only */
1195 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001196 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1197 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001198 if (ret < 0) {
1199 bdrv_delete(bs_ro);
1200 /* drive not functional anymore */
1201 bs->drv = NULL;
1202 return ret;
1203 }
1204 bs->backing_hd = bs_ro;
1205 return rw_ret;
1206 }
1207 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001208 }
bellardea2384d2004-08-01 21:59:26 +00001209
Jan Kiszka6ea44302009-11-30 18:21:19 +01001210 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001211 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001212
Kevin Wolf8a426612010-07-16 17:17:01 +02001213 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001214 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001215
1216 if (bdrv_read(bs, sector, buf, n) != 0) {
1217 ret = -EIO;
1218 goto ro_cleanup;
1219 }
1220
1221 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1222 ret = -EIO;
1223 goto ro_cleanup;
1224 }
bellardea2384d2004-08-01 21:59:26 +00001225 }
1226 }
bellard95389c82005-12-18 18:28:15 +00001227
Christoph Hellwig1d449522010-01-17 12:32:30 +01001228 if (drv->bdrv_make_empty) {
1229 ret = drv->bdrv_make_empty(bs);
1230 bdrv_flush(bs);
1231 }
bellard95389c82005-12-18 18:28:15 +00001232
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001233 /*
1234 * Make sure all data we wrote to the backing device is actually
1235 * stable on disk.
1236 */
1237 if (bs->backing_hd)
1238 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001239
1240ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001241 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001242
1243 if (ro) {
1244 /* re-open as RO */
1245 bdrv_delete(bs->backing_hd);
1246 bs->backing_hd = NULL;
1247 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001248 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1249 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001250 if (ret < 0) {
1251 bdrv_delete(bs_ro);
1252 /* drive not functional anymore */
1253 bs->drv = NULL;
1254 return ret;
1255 }
1256 bs->backing_hd = bs_ro;
1257 bs->backing_hd->keep_read_only = 0;
1258 }
1259
Christoph Hellwig1d449522010-01-17 12:32:30 +01001260 return ret;
bellard33e39632003-07-06 17:15:21 +00001261}
1262
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001263int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001264{
1265 BlockDriverState *bs;
1266
1267 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001268 int ret = bdrv_commit(bs);
1269 if (ret < 0) {
1270 return ret;
1271 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001272 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001273 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001274}
1275
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001276struct BdrvTrackedRequest {
1277 BlockDriverState *bs;
1278 int64_t sector_num;
1279 int nb_sectors;
1280 bool is_write;
1281 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001282 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001283 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001284};
1285
1286/**
1287 * Remove an active request from the tracked requests list
1288 *
1289 * This function should be called when a tracked request is completing.
1290 */
1291static void tracked_request_end(BdrvTrackedRequest *req)
1292{
1293 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001294 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001295}
1296
1297/**
1298 * Add an active request to the tracked requests list
1299 */
1300static void tracked_request_begin(BdrvTrackedRequest *req,
1301 BlockDriverState *bs,
1302 int64_t sector_num,
1303 int nb_sectors, bool is_write)
1304{
1305 *req = (BdrvTrackedRequest){
1306 .bs = bs,
1307 .sector_num = sector_num,
1308 .nb_sectors = nb_sectors,
1309 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001310 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001311 };
1312
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001313 qemu_co_queue_init(&req->wait_queue);
1314
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001315 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1316}
1317
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001318/**
1319 * Round a region to cluster boundaries
1320 */
1321static void round_to_clusters(BlockDriverState *bs,
1322 int64_t sector_num, int nb_sectors,
1323 int64_t *cluster_sector_num,
1324 int *cluster_nb_sectors)
1325{
1326 BlockDriverInfo bdi;
1327
1328 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1329 *cluster_sector_num = sector_num;
1330 *cluster_nb_sectors = nb_sectors;
1331 } else {
1332 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1333 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1334 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1335 nb_sectors, c);
1336 }
1337}
1338
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001339static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1340 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001341 /* aaaa bbbb */
1342 if (sector_num >= req->sector_num + req->nb_sectors) {
1343 return false;
1344 }
1345 /* bbbb aaaa */
1346 if (req->sector_num >= sector_num + nb_sectors) {
1347 return false;
1348 }
1349 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001350}
1351
1352static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1353 int64_t sector_num, int nb_sectors)
1354{
1355 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001356 int64_t cluster_sector_num;
1357 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001358 bool retry;
1359
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001360 /* If we touch the same cluster it counts as an overlap. This guarantees
1361 * that allocating writes will be serialized and not race with each other
1362 * for the same cluster. For example, in copy-on-read it ensures that the
1363 * CoR read and write operations are atomic and guest writes cannot
1364 * interleave between them.
1365 */
1366 round_to_clusters(bs, sector_num, nb_sectors,
1367 &cluster_sector_num, &cluster_nb_sectors);
1368
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001369 do {
1370 retry = false;
1371 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001372 if (tracked_request_overlaps(req, cluster_sector_num,
1373 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001374 /* Hitting this means there was a reentrant request, for
1375 * example, a block driver issuing nested requests. This must
1376 * never happen since it means deadlock.
1377 */
1378 assert(qemu_coroutine_self() != req->co);
1379
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001380 qemu_co_queue_wait(&req->wait_queue);
1381 retry = true;
1382 break;
1383 }
1384 }
1385 } while (retry);
1386}
1387
Kevin Wolf756e6732010-01-12 12:55:17 +01001388/*
1389 * Return values:
1390 * 0 - success
1391 * -EINVAL - backing format specified, but no file
1392 * -ENOSPC - can't update the backing file because no space is left in the
1393 * image file header
1394 * -ENOTSUP - format driver doesn't support changing the backing file
1395 */
1396int bdrv_change_backing_file(BlockDriverState *bs,
1397 const char *backing_file, const char *backing_fmt)
1398{
1399 BlockDriver *drv = bs->drv;
1400
1401 if (drv->bdrv_change_backing_file != NULL) {
1402 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1403 } else {
1404 return -ENOTSUP;
1405 }
1406}
1407
aliguori71d07702009-03-03 17:37:16 +00001408static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1409 size_t size)
1410{
1411 int64_t len;
1412
1413 if (!bdrv_is_inserted(bs))
1414 return -ENOMEDIUM;
1415
1416 if (bs->growable)
1417 return 0;
1418
1419 len = bdrv_getlength(bs);
1420
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001421 if (offset < 0)
1422 return -EIO;
1423
1424 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001425 return -EIO;
1426
1427 return 0;
1428}
1429
1430static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1431 int nb_sectors)
1432{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001433 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1434 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001435}
1436
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001437typedef struct RwCo {
1438 BlockDriverState *bs;
1439 int64_t sector_num;
1440 int nb_sectors;
1441 QEMUIOVector *qiov;
1442 bool is_write;
1443 int ret;
1444} RwCo;
1445
1446static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1447{
1448 RwCo *rwco = opaque;
1449
1450 if (!rwco->is_write) {
1451 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001452 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001453 } else {
1454 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001455 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001456 }
1457}
1458
1459/*
1460 * Process a synchronous request using coroutines
1461 */
1462static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1463 int nb_sectors, bool is_write)
1464{
1465 QEMUIOVector qiov;
1466 struct iovec iov = {
1467 .iov_base = (void *)buf,
1468 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1469 };
1470 Coroutine *co;
1471 RwCo rwco = {
1472 .bs = bs,
1473 .sector_num = sector_num,
1474 .nb_sectors = nb_sectors,
1475 .qiov = &qiov,
1476 .is_write = is_write,
1477 .ret = NOT_DONE,
1478 };
1479
1480 qemu_iovec_init_external(&qiov, &iov, 1);
1481
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001482 /**
1483 * In sync call context, when the vcpu is blocked, this throttling timer
1484 * will not fire; so the I/O throttling function has to be disabled here
1485 * if it has been enabled.
1486 */
1487 if (bs->io_limits_enabled) {
1488 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1489 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1490 bdrv_io_limits_disable(bs);
1491 }
1492
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001493 if (qemu_in_coroutine()) {
1494 /* Fast-path if already in coroutine context */
1495 bdrv_rw_co_entry(&rwco);
1496 } else {
1497 co = qemu_coroutine_create(bdrv_rw_co_entry);
1498 qemu_coroutine_enter(co, &rwco);
1499 while (rwco.ret == NOT_DONE) {
1500 qemu_aio_wait();
1501 }
1502 }
1503 return rwco.ret;
1504}
1505
bellard19cb3732006-08-19 11:45:59 +00001506/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001507int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001508 uint8_t *buf, int nb_sectors)
1509{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001510 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001511}
1512
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001513static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001514 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001515{
1516 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001517 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001518
Jan Kiszka6ea44302009-11-30 18:21:19 +01001519 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001520 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001521
1522 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001523 idx = start / (sizeof(unsigned long) * 8);
1524 bit = start % (sizeof(unsigned long) * 8);
1525 val = bs->dirty_bitmap[idx];
1526 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001527 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001528 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001529 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001530 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001531 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001532 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001533 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001534 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001535 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001536 }
1537 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001538 }
1539}
1540
ths5fafdf22007-09-16 21:08:06 +00001541/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001542 -EIO generic I/O error (may happen for all errors)
1543 -ENOMEDIUM No media inserted.
1544 -EINVAL Invalid sector number or nb_sectors
1545 -EACCES Trying to write a read-only device
1546*/
ths5fafdf22007-09-16 21:08:06 +00001547int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001548 const uint8_t *buf, int nb_sectors)
1549{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001550 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001551}
1552
aliguorieda578e2009-03-12 19:57:16 +00001553int bdrv_pread(BlockDriverState *bs, int64_t offset,
1554 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001555{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001556 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001557 int len, nb_sectors, count;
1558 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001559 int ret;
bellard83f64092006-08-01 16:21:11 +00001560
1561 count = count1;
1562 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001563 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001564 if (len > count)
1565 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001566 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001567 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001568 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1569 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001570 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001571 count -= len;
1572 if (count == 0)
1573 return count1;
1574 sector_num++;
1575 buf += len;
1576 }
1577
1578 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001579 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001580 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001581 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1582 return ret;
bellard83f64092006-08-01 16:21:11 +00001583 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001584 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001585 buf += len;
1586 count -= len;
1587 }
1588
1589 /* add data from the last sector */
1590 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001591 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1592 return ret;
bellard83f64092006-08-01 16:21:11 +00001593 memcpy(buf, tmp_buf, count);
1594 }
1595 return count1;
1596}
1597
aliguorieda578e2009-03-12 19:57:16 +00001598int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1599 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001600{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001601 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001602 int len, nb_sectors, count;
1603 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001604 int ret;
bellard83f64092006-08-01 16:21:11 +00001605
1606 count = count1;
1607 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001608 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001609 if (len > count)
1610 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001611 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001612 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001613 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1614 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001615 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001616 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1617 return ret;
bellard83f64092006-08-01 16:21:11 +00001618 count -= len;
1619 if (count == 0)
1620 return count1;
1621 sector_num++;
1622 buf += len;
1623 }
1624
1625 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001626 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001627 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001628 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1629 return ret;
bellard83f64092006-08-01 16:21:11 +00001630 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001631 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001632 buf += len;
1633 count -= len;
1634 }
1635
1636 /* add data from the last sector */
1637 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001638 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1639 return ret;
bellard83f64092006-08-01 16:21:11 +00001640 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001641 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1642 return ret;
bellard83f64092006-08-01 16:21:11 +00001643 }
1644 return count1;
1645}
bellard83f64092006-08-01 16:21:11 +00001646
Kevin Wolff08145f2010-06-16 16:38:15 +02001647/*
1648 * Writes to the file and ensures that no writes are reordered across this
1649 * request (acts as a barrier)
1650 *
1651 * Returns 0 on success, -errno in error cases.
1652 */
1653int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1654 const void *buf, int count)
1655{
1656 int ret;
1657
1658 ret = bdrv_pwrite(bs, offset, buf, count);
1659 if (ret < 0) {
1660 return ret;
1661 }
1662
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001663 /* No flush needed for cache modes that use O_DSYNC */
1664 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001665 bdrv_flush(bs);
1666 }
1667
1668 return 0;
1669}
1670
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001671static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001672 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1673{
1674 /* Perform I/O through a temporary buffer so that users who scribble over
1675 * their read buffer while the operation is in progress do not end up
1676 * modifying the image file. This is critical for zero-copy guest I/O
1677 * where anything might happen inside guest memory.
1678 */
1679 void *bounce_buffer;
1680
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001681 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00001682 struct iovec iov;
1683 QEMUIOVector bounce_qiov;
1684 int64_t cluster_sector_num;
1685 int cluster_nb_sectors;
1686 size_t skip_bytes;
1687 int ret;
1688
1689 /* Cover entire cluster so no additional backing file I/O is required when
1690 * allocating cluster in the image file.
1691 */
1692 round_to_clusters(bs, sector_num, nb_sectors,
1693 &cluster_sector_num, &cluster_nb_sectors);
1694
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001695 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1696 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001697
1698 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1699 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1700 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1701
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001702 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1703 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001704 if (ret < 0) {
1705 goto err;
1706 }
1707
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001708 if (drv->bdrv_co_write_zeroes &&
1709 buffer_is_zero(bounce_buffer, iov.iov_len)) {
1710 ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
1711 cluster_nb_sectors);
1712 } else {
1713 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001714 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00001715 }
1716
Stefan Hajnocziab185922011-11-17 13:40:31 +00001717 if (ret < 0) {
1718 /* It might be okay to ignore write errors for guest requests. If this
1719 * is a deliberate copy-on-read then we don't want to ignore the error.
1720 * Simply report it in all cases.
1721 */
1722 goto err;
1723 }
1724
1725 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1726 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1727 nb_sectors * BDRV_SECTOR_SIZE);
1728
1729err:
1730 qemu_vfree(bounce_buffer);
1731 return ret;
1732}
1733
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001734/*
1735 * Handle a read request in coroutine context
1736 */
1737static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001738 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1739 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001740{
1741 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001742 BdrvTrackedRequest req;
1743 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001744
Kevin Wolfda1fa912011-07-14 17:27:13 +02001745 if (!drv) {
1746 return -ENOMEDIUM;
1747 }
1748 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1749 return -EIO;
1750 }
1751
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001752 /* throttling disk read I/O */
1753 if (bs->io_limits_enabled) {
1754 bdrv_io_limits_intercept(bs, false, nb_sectors);
1755 }
1756
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001757 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001758 flags |= BDRV_REQ_COPY_ON_READ;
1759 }
1760 if (flags & BDRV_REQ_COPY_ON_READ) {
1761 bs->copy_on_read_in_flight++;
1762 }
1763
1764 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001765 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1766 }
1767
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001768 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001769
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001770 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001771 int pnum;
1772
1773 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1774 if (ret < 0) {
1775 goto out;
1776 }
1777
1778 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001779 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001780 goto out;
1781 }
1782 }
1783
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001784 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001785
1786out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001787 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001788
1789 if (flags & BDRV_REQ_COPY_ON_READ) {
1790 bs->copy_on_read_in_flight--;
1791 }
1792
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001793 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001794}
1795
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001796int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001797 int nb_sectors, QEMUIOVector *qiov)
1798{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001799 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001800
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001801 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1802}
1803
1804int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1805 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1806{
1807 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1808
1809 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1810 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001811}
1812
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001813static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1814 int64_t sector_num, int nb_sectors)
1815{
1816 BlockDriver *drv = bs->drv;
1817 QEMUIOVector qiov;
1818 struct iovec iov;
1819 int ret;
1820
1821 /* First try the efficient write zeroes operation */
1822 if (drv->bdrv_co_write_zeroes) {
1823 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1824 }
1825
1826 /* Fall back to bounce buffer if write zeroes is unsupported */
1827 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1828 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1829 memset(iov.iov_base, 0, iov.iov_len);
1830 qemu_iovec_init_external(&qiov, &iov, 1);
1831
1832 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1833
1834 qemu_vfree(iov.iov_base);
1835 return ret;
1836}
1837
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001838/*
1839 * Handle a write request in coroutine context
1840 */
1841static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001842 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1843 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001844{
1845 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001846 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001847 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001848
1849 if (!bs->drv) {
1850 return -ENOMEDIUM;
1851 }
1852 if (bs->read_only) {
1853 return -EACCES;
1854 }
1855 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1856 return -EIO;
1857 }
1858
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001859 /* throttling disk write I/O */
1860 if (bs->io_limits_enabled) {
1861 bdrv_io_limits_intercept(bs, true, nb_sectors);
1862 }
1863
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001864 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001865 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1866 }
1867
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001868 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1869
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001870 if (flags & BDRV_REQ_ZERO_WRITE) {
1871 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1872 } else {
1873 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1874 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001875
Kevin Wolfda1fa912011-07-14 17:27:13 +02001876 if (bs->dirty_bitmap) {
1877 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1878 }
1879
1880 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1881 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1882 }
1883
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001884 tracked_request_end(&req);
1885
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001886 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001887}
1888
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001889int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1890 int nb_sectors, QEMUIOVector *qiov)
1891{
1892 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1893
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001894 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1895}
1896
1897int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1898 int64_t sector_num, int nb_sectors)
1899{
1900 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1901
1902 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1903 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001904}
1905
bellard83f64092006-08-01 16:21:11 +00001906/**
bellard83f64092006-08-01 16:21:11 +00001907 * Truncate file to 'offset' bytes (needed only for file protocols)
1908 */
1909int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1910{
1911 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001912 int ret;
bellard83f64092006-08-01 16:21:11 +00001913 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001914 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001915 if (!drv->bdrv_truncate)
1916 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001917 if (bs->read_only)
1918 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001919 if (bdrv_in_use(bs))
1920 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001921 ret = drv->bdrv_truncate(bs, offset);
1922 if (ret == 0) {
1923 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001924 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001925 }
1926 return ret;
bellard83f64092006-08-01 16:21:11 +00001927}
1928
1929/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001930 * Length of a allocated file in bytes. Sparse files are counted by actual
1931 * allocated space. Return < 0 if error or unknown.
1932 */
1933int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1934{
1935 BlockDriver *drv = bs->drv;
1936 if (!drv) {
1937 return -ENOMEDIUM;
1938 }
1939 if (drv->bdrv_get_allocated_file_size) {
1940 return drv->bdrv_get_allocated_file_size(bs);
1941 }
1942 if (bs->file) {
1943 return bdrv_get_allocated_file_size(bs->file);
1944 }
1945 return -ENOTSUP;
1946}
1947
1948/**
bellard83f64092006-08-01 16:21:11 +00001949 * Length of a file in bytes. Return < 0 if error or unknown.
1950 */
1951int64_t bdrv_getlength(BlockDriverState *bs)
1952{
1953 BlockDriver *drv = bs->drv;
1954 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001955 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001956
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001957 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001958 if (drv->bdrv_getlength) {
1959 return drv->bdrv_getlength(bs);
1960 }
bellard83f64092006-08-01 16:21:11 +00001961 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001962 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001963}
1964
bellard19cb3732006-08-19 11:45:59 +00001965/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001966void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001967{
bellard19cb3732006-08-19 11:45:59 +00001968 int64_t length;
1969 length = bdrv_getlength(bs);
1970 if (length < 0)
1971 length = 0;
1972 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001973 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001974 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001975}
bellardcf989512004-02-16 21:56:36 +00001976
aliguorif3d54fc2008-11-25 21:50:24 +00001977struct partition {
1978 uint8_t boot_ind; /* 0x80 - active */
1979 uint8_t head; /* starting head */
1980 uint8_t sector; /* starting sector */
1981 uint8_t cyl; /* starting cylinder */
1982 uint8_t sys_ind; /* What partition type */
1983 uint8_t end_head; /* end head */
1984 uint8_t end_sector; /* end sector */
1985 uint8_t end_cyl; /* end cylinder */
1986 uint32_t start_sect; /* starting sector counting from 0 */
1987 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001988} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001989
1990/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1991static int guess_disk_lchs(BlockDriverState *bs,
1992 int *pcylinders, int *pheads, int *psectors)
1993{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001994 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001995 int ret, i, heads, sectors, cylinders;
1996 struct partition *p;
1997 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001998 uint64_t nb_sectors;
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001999 bool enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002000
2001 bdrv_get_geometry(bs, &nb_sectors);
2002
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002003 /**
2004 * The function will be invoked during startup not only in sync I/O mode,
2005 * but also in async I/O mode. So the I/O throttling function has to
2006 * be disabled temporarily here, not permanently.
2007 */
2008 enabled = bs->io_limits_enabled;
2009 bs->io_limits_enabled = false;
aliguorif3d54fc2008-11-25 21:50:24 +00002010 ret = bdrv_read(bs, 0, buf, 1);
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002011 bs->io_limits_enabled = enabled;
aliguorif3d54fc2008-11-25 21:50:24 +00002012 if (ret < 0)
2013 return -1;
2014 /* test msdos magic */
2015 if (buf[510] != 0x55 || buf[511] != 0xaa)
2016 return -1;
2017 for(i = 0; i < 4; i++) {
2018 p = ((struct partition *)(buf + 0x1be)) + i;
2019 nr_sects = le32_to_cpu(p->nr_sects);
2020 if (nr_sects && p->end_head) {
2021 /* We make the assumption that the partition terminates on
2022 a cylinder boundary */
2023 heads = p->end_head + 1;
2024 sectors = p->end_sector & 63;
2025 if (sectors == 0)
2026 continue;
2027 cylinders = nb_sectors / (heads * sectors);
2028 if (cylinders < 1 || cylinders > 16383)
2029 continue;
2030 *pheads = heads;
2031 *psectors = sectors;
2032 *pcylinders = cylinders;
2033#if 0
2034 printf("guessed geometry: LCHS=%d %d %d\n",
2035 cylinders, heads, sectors);
2036#endif
2037 return 0;
2038 }
2039 }
2040 return -1;
2041}
2042
2043void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2044{
2045 int translation, lba_detected = 0;
2046 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00002047 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00002048
2049 /* if a geometry hint is available, use it */
2050 bdrv_get_geometry(bs, &nb_sectors);
2051 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2052 translation = bdrv_get_translation_hint(bs);
2053 if (cylinders != 0) {
2054 *pcyls = cylinders;
2055 *pheads = heads;
2056 *psecs = secs;
2057 } else {
2058 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2059 if (heads > 16) {
2060 /* if heads > 16, it means that a BIOS LBA
2061 translation was active, so the default
2062 hardware geometry is OK */
2063 lba_detected = 1;
2064 goto default_geometry;
2065 } else {
2066 *pcyls = cylinders;
2067 *pheads = heads;
2068 *psecs = secs;
2069 /* disable any translation to be in sync with
2070 the logical geometry */
2071 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2072 bdrv_set_translation_hint(bs,
2073 BIOS_ATA_TRANSLATION_NONE);
2074 }
2075 }
2076 } else {
2077 default_geometry:
2078 /* if no geometry, use a standard physical disk geometry */
2079 cylinders = nb_sectors / (16 * 63);
2080
2081 if (cylinders > 16383)
2082 cylinders = 16383;
2083 else if (cylinders < 2)
2084 cylinders = 2;
2085 *pcyls = cylinders;
2086 *pheads = 16;
2087 *psecs = 63;
2088 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2089 if ((*pcyls * *pheads) <= 131072) {
2090 bdrv_set_translation_hint(bs,
2091 BIOS_ATA_TRANSLATION_LARGE);
2092 } else {
2093 bdrv_set_translation_hint(bs,
2094 BIOS_ATA_TRANSLATION_LBA);
2095 }
2096 }
2097 }
2098 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2099 }
2100}
2101
ths5fafdf22007-09-16 21:08:06 +00002102void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002103 int cyls, int heads, int secs)
2104{
2105 bs->cyls = cyls;
2106 bs->heads = heads;
2107 bs->secs = secs;
2108}
2109
bellard46d47672004-11-16 01:45:27 +00002110void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2111{
2112 bs->translation = translation;
2113}
2114
ths5fafdf22007-09-16 21:08:06 +00002115void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00002116 int *pcyls, int *pheads, int *psecs)
2117{
2118 *pcyls = bs->cyls;
2119 *pheads = bs->heads;
2120 *psecs = bs->secs;
2121}
2122
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002123/* throttling disk io limits */
2124void bdrv_set_io_limits(BlockDriverState *bs,
2125 BlockIOLimit *io_limits)
2126{
2127 bs->io_limits = *io_limits;
2128 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2129}
2130
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002131/* Recognize floppy formats */
2132typedef struct FDFormat {
2133 FDriveType drive;
2134 uint8_t last_sect;
2135 uint8_t max_track;
2136 uint8_t max_head;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002137 FDriveRate rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002138} FDFormat;
2139
2140static const FDFormat fd_formats[] = {
2141 /* First entry is default format */
2142 /* 1.44 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002143 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2144 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2145 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2146 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2147 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2148 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2149 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2150 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002151 /* 2.88 MB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002152 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2153 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2154 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2155 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2156 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002157 /* 720 kB 3"1/2 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002158 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2159 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2160 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2161 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2162 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2163 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002164 /* 1.2 MB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002165 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2166 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2167 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2168 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2169 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002170 /* 720 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002171 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2172 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002173 /* 360 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002174 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2175 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2176 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2177 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002178 /* 320 kB 5"1/4 floppy disks */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002179 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2180 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002181 /* 360 kB must match 5"1/4 better than 3"1/2... */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002182 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002183 /* end */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002184 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002185};
2186
2187void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2188 int *max_track, int *last_sect,
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002189 FDriveType drive_in, FDriveType *drive,
2190 FDriveRate *rate)
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002191{
2192 const FDFormat *parse;
2193 uint64_t nb_sectors, size;
2194 int i, first_match, match;
2195
2196 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2197 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2198 /* User defined disk */
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002199 *rate = FDRIVE_RATE_500K;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002200 } else {
2201 bdrv_get_geometry(bs, &nb_sectors);
2202 match = -1;
2203 first_match = -1;
2204 for (i = 0; ; i++) {
2205 parse = &fd_formats[i];
2206 if (parse->drive == FDRIVE_DRV_NONE) {
2207 break;
2208 }
2209 if (drive_in == parse->drive ||
2210 drive_in == FDRIVE_DRV_NONE) {
2211 size = (parse->max_head + 1) * parse->max_track *
2212 parse->last_sect;
2213 if (nb_sectors == size) {
2214 match = i;
2215 break;
2216 }
2217 if (first_match == -1) {
2218 first_match = i;
2219 }
2220 }
2221 }
2222 if (match == -1) {
2223 if (first_match == -1) {
2224 match = 1;
2225 } else {
2226 match = first_match;
2227 }
2228 parse = &fd_formats[match];
2229 }
2230 *nb_heads = parse->max_head + 1;
2231 *max_track = parse->max_track;
2232 *last_sect = parse->last_sect;
2233 *drive = parse->drive;
Hervé Poussineauf8d3d122012-02-06 22:29:07 +01002234 *rate = parse->rate;
Blue Swirl5bbdbb42011-02-12 20:43:32 +00002235 }
2236}
2237
bellard46d47672004-11-16 01:45:27 +00002238int bdrv_get_translation_hint(BlockDriverState *bs)
2239{
2240 return bs->translation;
2241}
2242
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002243void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2244 BlockErrorAction on_write_error)
2245{
2246 bs->on_read_error = on_read_error;
2247 bs->on_write_error = on_write_error;
2248}
2249
2250BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2251{
2252 return is_read ? bs->on_read_error : bs->on_write_error;
2253}
2254
bellardb3380822004-03-14 21:38:54 +00002255int bdrv_is_read_only(BlockDriverState *bs)
2256{
2257 return bs->read_only;
2258}
2259
ths985a03b2007-12-24 16:10:43 +00002260int bdrv_is_sg(BlockDriverState *bs)
2261{
2262 return bs->sg;
2263}
2264
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002265int bdrv_enable_write_cache(BlockDriverState *bs)
2266{
2267 return bs->enable_write_cache;
2268}
2269
bellardea2384d2004-08-01 21:59:26 +00002270int bdrv_is_encrypted(BlockDriverState *bs)
2271{
2272 if (bs->backing_hd && bs->backing_hd->encrypted)
2273 return 1;
2274 return bs->encrypted;
2275}
2276
aliguoric0f4ce72009-03-05 23:01:01 +00002277int bdrv_key_required(BlockDriverState *bs)
2278{
2279 BlockDriverState *backing_hd = bs->backing_hd;
2280
2281 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2282 return 1;
2283 return (bs->encrypted && !bs->valid_key);
2284}
2285
bellardea2384d2004-08-01 21:59:26 +00002286int bdrv_set_key(BlockDriverState *bs, const char *key)
2287{
2288 int ret;
2289 if (bs->backing_hd && bs->backing_hd->encrypted) {
2290 ret = bdrv_set_key(bs->backing_hd, key);
2291 if (ret < 0)
2292 return ret;
2293 if (!bs->encrypted)
2294 return 0;
2295 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002296 if (!bs->encrypted) {
2297 return -EINVAL;
2298 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2299 return -ENOMEDIUM;
2300 }
aliguoric0f4ce72009-03-05 23:01:01 +00002301 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002302 if (ret < 0) {
2303 bs->valid_key = 0;
2304 } else if (!bs->valid_key) {
2305 bs->valid_key = 1;
2306 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002307 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002308 }
aliguoric0f4ce72009-03-05 23:01:01 +00002309 return ret;
bellardea2384d2004-08-01 21:59:26 +00002310}
2311
2312void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2313{
bellard19cb3732006-08-19 11:45:59 +00002314 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002315 buf[0] = '\0';
2316 } else {
2317 pstrcpy(buf, buf_size, bs->drv->format_name);
2318 }
2319}
2320
ths5fafdf22007-09-16 21:08:06 +00002321void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002322 void *opaque)
2323{
2324 BlockDriver *drv;
2325
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002326 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002327 it(opaque, drv->format_name);
2328 }
2329}
2330
bellardb3380822004-03-14 21:38:54 +00002331BlockDriverState *bdrv_find(const char *name)
2332{
2333 BlockDriverState *bs;
2334
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002335 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2336 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002337 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002338 }
bellardb3380822004-03-14 21:38:54 +00002339 }
2340 return NULL;
2341}
2342
Markus Armbruster2f399b02010-06-02 18:55:20 +02002343BlockDriverState *bdrv_next(BlockDriverState *bs)
2344{
2345 if (!bs) {
2346 return QTAILQ_FIRST(&bdrv_states);
2347 }
2348 return QTAILQ_NEXT(bs, list);
2349}
2350
aliguori51de9762009-03-05 23:00:43 +00002351void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002352{
2353 BlockDriverState *bs;
2354
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002355 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002356 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002357 }
2358}
2359
bellardea2384d2004-08-01 21:59:26 +00002360const char *bdrv_get_device_name(BlockDriverState *bs)
2361{
2362 return bs->device_name;
2363}
2364
aliguoric6ca28d2008-10-06 13:55:43 +00002365void bdrv_flush_all(void)
2366{
2367 BlockDriverState *bs;
2368
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002369 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002370 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002371 }
aliguoric6ca28d2008-10-06 13:55:43 +00002372}
2373
Kevin Wolff2feebb2010-04-14 17:30:35 +02002374int bdrv_has_zero_init(BlockDriverState *bs)
2375{
2376 assert(bs->drv);
2377
Kevin Wolf336c1c12010-07-28 11:26:29 +02002378 if (bs->drv->bdrv_has_zero_init) {
2379 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002380 }
2381
2382 return 1;
2383}
2384
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002385typedef struct BdrvCoIsAllocatedData {
2386 BlockDriverState *bs;
2387 int64_t sector_num;
2388 int nb_sectors;
2389 int *pnum;
2390 int ret;
2391 bool done;
2392} BdrvCoIsAllocatedData;
2393
thsf58c7b32008-06-05 21:53:49 +00002394/*
2395 * Returns true iff the specified sector is present in the disk image. Drivers
2396 * not implementing the functionality are assumed to not support backing files,
2397 * hence all their sectors are reported as allocated.
2398 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002399 * If 'sector_num' is beyond the end of the disk image the return value is 0
2400 * and 'pnum' is set to 0.
2401 *
thsf58c7b32008-06-05 21:53:49 +00002402 * 'pnum' is set to the number of sectors (including and immediately following
2403 * the specified sector) that are known to be in the same
2404 * allocated/unallocated state.
2405 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002406 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2407 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002408 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002409int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2410 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002411{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002412 int64_t n;
2413
2414 if (sector_num >= bs->total_sectors) {
2415 *pnum = 0;
2416 return 0;
2417 }
2418
2419 n = bs->total_sectors - sector_num;
2420 if (n < nb_sectors) {
2421 nb_sectors = n;
2422 }
2423
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002424 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002425 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002426 return 1;
2427 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002428
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002429 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2430}
2431
2432/* Coroutine wrapper for bdrv_is_allocated() */
2433static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2434{
2435 BdrvCoIsAllocatedData *data = opaque;
2436 BlockDriverState *bs = data->bs;
2437
2438 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2439 data->pnum);
2440 data->done = true;
2441}
2442
2443/*
2444 * Synchronous wrapper around bdrv_co_is_allocated().
2445 *
2446 * See bdrv_co_is_allocated() for details.
2447 */
2448int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2449 int *pnum)
2450{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002451 Coroutine *co;
2452 BdrvCoIsAllocatedData data = {
2453 .bs = bs,
2454 .sector_num = sector_num,
2455 .nb_sectors = nb_sectors,
2456 .pnum = pnum,
2457 .done = false,
2458 };
2459
2460 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2461 qemu_coroutine_enter(co, &data);
2462 while (!data.done) {
2463 qemu_aio_wait();
2464 }
2465 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002466}
2467
Luiz Capitulinob2023812011-09-21 17:16:47 -03002468BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002469{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002470 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002471 BlockDriverState *bs;
2472
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002473 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002474 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002475
Luiz Capitulinob2023812011-09-21 17:16:47 -03002476 info->value = g_malloc0(sizeof(*info->value));
2477 info->value->device = g_strdup(bs->device_name);
2478 info->value->type = g_strdup("unknown");
2479 info->value->locked = bdrv_dev_is_medium_locked(bs);
2480 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002481
Markus Armbrustere4def802011-09-06 18:58:53 +02002482 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002483 info->value->has_tray_open = true;
2484 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002485 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002486
2487 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002488 info->value->has_io_status = true;
2489 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002490 }
2491
bellard19cb3732006-08-19 11:45:59 +00002492 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002493 info->value->has_inserted = true;
2494 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2495 info->value->inserted->file = g_strdup(bs->filename);
2496 info->value->inserted->ro = bs->read_only;
2497 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2498 info->value->inserted->encrypted = bs->encrypted;
2499 if (bs->backing_file[0]) {
2500 info->value->inserted->has_backing_file = true;
2501 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002502 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002503
2504 if (bs->io_limits_enabled) {
2505 info->value->inserted->bps =
2506 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2507 info->value->inserted->bps_rd =
2508 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2509 info->value->inserted->bps_wr =
2510 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2511 info->value->inserted->iops =
2512 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2513 info->value->inserted->iops_rd =
2514 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2515 info->value->inserted->iops_wr =
2516 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2517 }
bellardb3380822004-03-14 21:38:54 +00002518 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002519
2520 /* XXX: waiting for the qapi to support GSList */
2521 if (!cur_item) {
2522 head = cur_item = info;
2523 } else {
2524 cur_item->next = info;
2525 cur_item = info;
2526 }
bellardb3380822004-03-14 21:38:54 +00002527 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002528
Luiz Capitulinob2023812011-09-21 17:16:47 -03002529 return head;
bellardb3380822004-03-14 21:38:54 +00002530}
thsa36e69d2007-12-02 05:18:19 +00002531
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002532/* Consider exposing this as a full fledged QMP command */
2533static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002534{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002535 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002536
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002537 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002538
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002539 if (bs->device_name[0]) {
2540 s->has_device = true;
2541 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002542 }
2543
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002544 s->stats = g_malloc0(sizeof(*s->stats));
2545 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2546 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2547 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2548 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2549 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2550 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2551 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2552 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2553 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2554
Kevin Wolf294cc352010-04-28 14:34:01 +02002555 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002556 s->has_parent = true;
2557 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002558 }
2559
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002560 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002561}
2562
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002563BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002564{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002565 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002566 BlockDriverState *bs;
2567
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002568 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002569 BlockStatsList *info = g_malloc0(sizeof(*info));
2570 info->value = qmp_query_blockstat(bs, NULL);
2571
2572 /* XXX: waiting for the qapi to support GSList */
2573 if (!cur_item) {
2574 head = cur_item = info;
2575 } else {
2576 cur_item->next = info;
2577 cur_item = info;
2578 }
thsa36e69d2007-12-02 05:18:19 +00002579 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002580
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002581 return head;
thsa36e69d2007-12-02 05:18:19 +00002582}
bellardea2384d2004-08-01 21:59:26 +00002583
aliguori045df332009-03-05 23:00:48 +00002584const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2585{
2586 if (bs->backing_hd && bs->backing_hd->encrypted)
2587 return bs->backing_file;
2588 else if (bs->encrypted)
2589 return bs->filename;
2590 else
2591 return NULL;
2592}
2593
ths5fafdf22007-09-16 21:08:06 +00002594void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002595 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002596{
Kevin Wolf3574c602011-10-26 11:02:11 +02002597 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002598}
2599
ths5fafdf22007-09-16 21:08:06 +00002600int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002601 const uint8_t *buf, int nb_sectors)
2602{
2603 BlockDriver *drv = bs->drv;
2604 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002605 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002606 if (!drv->bdrv_write_compressed)
2607 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002608 if (bdrv_check_request(bs, sector_num, nb_sectors))
2609 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002610
Jan Kiszkac6d22832009-11-30 18:21:20 +01002611 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002612 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2613 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002614
bellardfaea38e2006-08-05 21:31:00 +00002615 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2616}
ths3b46e622007-09-17 08:09:54 +00002617
bellardfaea38e2006-08-05 21:31:00 +00002618int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2619{
2620 BlockDriver *drv = bs->drv;
2621 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002622 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002623 if (!drv->bdrv_get_info)
2624 return -ENOTSUP;
2625 memset(bdi, 0, sizeof(*bdi));
2626 return drv->bdrv_get_info(bs, bdi);
2627}
2628
Christoph Hellwig45566e92009-07-10 23:11:57 +02002629int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2630 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002631{
2632 BlockDriver *drv = bs->drv;
2633 if (!drv)
2634 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002635 if (drv->bdrv_save_vmstate)
2636 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2637 if (bs->file)
2638 return bdrv_save_vmstate(bs->file, buf, pos, size);
2639 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002640}
2641
Christoph Hellwig45566e92009-07-10 23:11:57 +02002642int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2643 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002644{
2645 BlockDriver *drv = bs->drv;
2646 if (!drv)
2647 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002648 if (drv->bdrv_load_vmstate)
2649 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2650 if (bs->file)
2651 return bdrv_load_vmstate(bs->file, buf, pos, size);
2652 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002653}
2654
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002655void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2656{
2657 BlockDriver *drv = bs->drv;
2658
2659 if (!drv || !drv->bdrv_debug_event) {
2660 return;
2661 }
2662
2663 return drv->bdrv_debug_event(bs, event);
2664
2665}
2666
bellardfaea38e2006-08-05 21:31:00 +00002667/**************************************************************/
2668/* handling of snapshots */
2669
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002670int bdrv_can_snapshot(BlockDriverState *bs)
2671{
2672 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002673 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002674 return 0;
2675 }
2676
2677 if (!drv->bdrv_snapshot_create) {
2678 if (bs->file != NULL) {
2679 return bdrv_can_snapshot(bs->file);
2680 }
2681 return 0;
2682 }
2683
2684 return 1;
2685}
2686
Blue Swirl199630b2010-07-25 20:49:34 +00002687int bdrv_is_snapshot(BlockDriverState *bs)
2688{
2689 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2690}
2691
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002692BlockDriverState *bdrv_snapshots(void)
2693{
2694 BlockDriverState *bs;
2695
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002696 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002697 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002698 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002699
2700 bs = NULL;
2701 while ((bs = bdrv_next(bs))) {
2702 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002703 bs_snapshots = bs;
2704 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002705 }
2706 }
2707 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002708}
2709
ths5fafdf22007-09-16 21:08:06 +00002710int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002711 QEMUSnapshotInfo *sn_info)
2712{
2713 BlockDriver *drv = bs->drv;
2714 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002715 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002716 if (drv->bdrv_snapshot_create)
2717 return drv->bdrv_snapshot_create(bs, sn_info);
2718 if (bs->file)
2719 return bdrv_snapshot_create(bs->file, sn_info);
2720 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002721}
2722
ths5fafdf22007-09-16 21:08:06 +00002723int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002724 const char *snapshot_id)
2725{
2726 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002727 int ret, open_ret;
2728
bellardfaea38e2006-08-05 21:31:00 +00002729 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002730 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002731 if (drv->bdrv_snapshot_goto)
2732 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2733
2734 if (bs->file) {
2735 drv->bdrv_close(bs);
2736 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2737 open_ret = drv->bdrv_open(bs, bs->open_flags);
2738 if (open_ret < 0) {
2739 bdrv_delete(bs->file);
2740 bs->drv = NULL;
2741 return open_ret;
2742 }
2743 return ret;
2744 }
2745
2746 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002747}
2748
2749int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2750{
2751 BlockDriver *drv = bs->drv;
2752 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002753 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002754 if (drv->bdrv_snapshot_delete)
2755 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2756 if (bs->file)
2757 return bdrv_snapshot_delete(bs->file, snapshot_id);
2758 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002759}
2760
ths5fafdf22007-09-16 21:08:06 +00002761int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002762 QEMUSnapshotInfo **psn_info)
2763{
2764 BlockDriver *drv = bs->drv;
2765 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002766 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002767 if (drv->bdrv_snapshot_list)
2768 return drv->bdrv_snapshot_list(bs, psn_info);
2769 if (bs->file)
2770 return bdrv_snapshot_list(bs->file, psn_info);
2771 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002772}
2773
edison51ef6722010-09-21 19:58:41 -07002774int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2775 const char *snapshot_name)
2776{
2777 BlockDriver *drv = bs->drv;
2778 if (!drv) {
2779 return -ENOMEDIUM;
2780 }
2781 if (!bs->read_only) {
2782 return -EINVAL;
2783 }
2784 if (drv->bdrv_snapshot_load_tmp) {
2785 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2786 }
2787 return -ENOTSUP;
2788}
2789
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002790BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2791 const char *backing_file)
2792{
2793 if (!bs->drv) {
2794 return NULL;
2795 }
2796
2797 if (bs->backing_hd) {
2798 if (strcmp(bs->backing_file, backing_file) == 0) {
2799 return bs->backing_hd;
2800 } else {
2801 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2802 }
2803 }
2804
2805 return NULL;
2806}
2807
bellardfaea38e2006-08-05 21:31:00 +00002808#define NB_SUFFIXES 4
2809
2810char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2811{
2812 static const char suffixes[NB_SUFFIXES] = "KMGT";
2813 int64_t base;
2814 int i;
2815
2816 if (size <= 999) {
2817 snprintf(buf, buf_size, "%" PRId64, size);
2818 } else {
2819 base = 1024;
2820 for(i = 0; i < NB_SUFFIXES; i++) {
2821 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002822 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002823 (double)size / base,
2824 suffixes[i]);
2825 break;
2826 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002827 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002828 ((size + (base >> 1)) / base),
2829 suffixes[i]);
2830 break;
2831 }
2832 base = base * 1024;
2833 }
2834 }
2835 return buf;
2836}
2837
2838char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2839{
2840 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002841#ifdef _WIN32
2842 struct tm *ptm;
2843#else
bellardfaea38e2006-08-05 21:31:00 +00002844 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002845#endif
bellardfaea38e2006-08-05 21:31:00 +00002846 time_t ti;
2847 int64_t secs;
2848
2849 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002850 snprintf(buf, buf_size,
2851 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002852 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2853 } else {
2854 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002855#ifdef _WIN32
2856 ptm = localtime(&ti);
2857 strftime(date_buf, sizeof(date_buf),
2858 "%Y-%m-%d %H:%M:%S", ptm);
2859#else
bellardfaea38e2006-08-05 21:31:00 +00002860 localtime_r(&ti, &tm);
2861 strftime(date_buf, sizeof(date_buf),
2862 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002863#endif
bellardfaea38e2006-08-05 21:31:00 +00002864 secs = sn->vm_clock_nsec / 1000000000;
2865 snprintf(clock_buf, sizeof(clock_buf),
2866 "%02d:%02d:%02d.%03d",
2867 (int)(secs / 3600),
2868 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002869 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002870 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2871 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002872 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002873 sn->id_str, sn->name,
2874 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2875 date_buf,
2876 clock_buf);
2877 }
2878 return buf;
2879}
2880
bellard83f64092006-08-01 16:21:11 +00002881/**************************************************************/
2882/* async I/Os */
2883
aliguori3b69e4b2009-01-22 16:59:24 +00002884BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002885 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002886 BlockDriverCompletionFunc *cb, void *opaque)
2887{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002888 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2889
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002890 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002891 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002892}
2893
aliguorif141eaf2009-04-07 18:43:24 +00002894BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2895 QEMUIOVector *qiov, int nb_sectors,
2896 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002897{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002898 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2899
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002900 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002901 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002902}
2903
Kevin Wolf40b4f532009-09-09 17:53:37 +02002904
2905typedef struct MultiwriteCB {
2906 int error;
2907 int num_requests;
2908 int num_callbacks;
2909 struct {
2910 BlockDriverCompletionFunc *cb;
2911 void *opaque;
2912 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002913 } callbacks[];
2914} MultiwriteCB;
2915
2916static void multiwrite_user_cb(MultiwriteCB *mcb)
2917{
2918 int i;
2919
2920 for (i = 0; i < mcb->num_callbacks; i++) {
2921 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002922 if (mcb->callbacks[i].free_qiov) {
2923 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2924 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002925 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002926 }
2927}
2928
2929static void multiwrite_cb(void *opaque, int ret)
2930{
2931 MultiwriteCB *mcb = opaque;
2932
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002933 trace_multiwrite_cb(mcb, ret);
2934
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002935 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002936 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002937 }
2938
2939 mcb->num_requests--;
2940 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002941 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002942 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002943 }
2944}
2945
2946static int multiwrite_req_compare(const void *a, const void *b)
2947{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002948 const BlockRequest *req1 = a, *req2 = b;
2949
2950 /*
2951 * Note that we can't simply subtract req2->sector from req1->sector
2952 * here as that could overflow the return value.
2953 */
2954 if (req1->sector > req2->sector) {
2955 return 1;
2956 } else if (req1->sector < req2->sector) {
2957 return -1;
2958 } else {
2959 return 0;
2960 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002961}
2962
2963/*
2964 * Takes a bunch of requests and tries to merge them. Returns the number of
2965 * requests that remain after merging.
2966 */
2967static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2968 int num_reqs, MultiwriteCB *mcb)
2969{
2970 int i, outidx;
2971
2972 // Sort requests by start sector
2973 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2974
2975 // Check if adjacent requests touch the same clusters. If so, combine them,
2976 // filling up gaps with zero sectors.
2977 outidx = 0;
2978 for (i = 1; i < num_reqs; i++) {
2979 int merge = 0;
2980 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2981
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01002982 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02002983 if (reqs[i].sector <= oldreq_last) {
2984 merge = 1;
2985 }
2986
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002987 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2988 merge = 0;
2989 }
2990
Kevin Wolf40b4f532009-09-09 17:53:37 +02002991 if (merge) {
2992 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002993 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002994 qemu_iovec_init(qiov,
2995 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2996
2997 // Add the first request to the merged one. If the requests are
2998 // overlapping, drop the last sectors of the first request.
2999 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3000 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3001
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003002 // We should need to add any zeros between the two requests
3003 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003004
3005 // Add the second request
3006 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3007
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003008 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003009 reqs[outidx].qiov = qiov;
3010
3011 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3012 } else {
3013 outidx++;
3014 reqs[outidx].sector = reqs[i].sector;
3015 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3016 reqs[outidx].qiov = reqs[i].qiov;
3017 }
3018 }
3019
3020 return outidx + 1;
3021}
3022
3023/*
3024 * Submit multiple AIO write requests at once.
3025 *
3026 * On success, the function returns 0 and all requests in the reqs array have
3027 * been submitted. In error case this function returns -1, and any of the
3028 * requests may or may not be submitted yet. In particular, this means that the
3029 * callback will be called for some of the requests, for others it won't. The
3030 * caller must check the error field of the BlockRequest to wait for the right
3031 * callbacks (if error != 0, no callback will be called).
3032 *
3033 * The implementation may modify the contents of the reqs array, e.g. to merge
3034 * requests. However, the fields opaque and error are left unmodified as they
3035 * are used to signal failure for a single request to the caller.
3036 */
3037int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3038{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003039 MultiwriteCB *mcb;
3040 int i;
3041
Ryan Harper301db7c2011-03-07 10:01:04 -06003042 /* don't submit writes if we don't have a medium */
3043 if (bs->drv == NULL) {
3044 for (i = 0; i < num_reqs; i++) {
3045 reqs[i].error = -ENOMEDIUM;
3046 }
3047 return -1;
3048 }
3049
Kevin Wolf40b4f532009-09-09 17:53:37 +02003050 if (num_reqs == 0) {
3051 return 0;
3052 }
3053
3054 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003055 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003056 mcb->num_requests = 0;
3057 mcb->num_callbacks = num_reqs;
3058
3059 for (i = 0; i < num_reqs; i++) {
3060 mcb->callbacks[i].cb = reqs[i].cb;
3061 mcb->callbacks[i].opaque = reqs[i].opaque;
3062 }
3063
3064 // Check for mergable requests
3065 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3066
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003067 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3068
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003069 /* Run the aio requests. */
3070 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003071 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003072 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003073 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003074 }
3075
3076 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003077}
3078
bellard83f64092006-08-01 16:21:11 +00003079void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003080{
aliguori6bbff9a2009-03-20 18:25:59 +00003081 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003082}
3083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003084/* block I/O throttling */
3085static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3086 bool is_write, double elapsed_time, uint64_t *wait)
3087{
3088 uint64_t bps_limit = 0;
3089 double bytes_limit, bytes_base, bytes_res;
3090 double slice_time, wait_time;
3091
3092 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3093 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3094 } else if (bs->io_limits.bps[is_write]) {
3095 bps_limit = bs->io_limits.bps[is_write];
3096 } else {
3097 if (wait) {
3098 *wait = 0;
3099 }
3100
3101 return false;
3102 }
3103
3104 slice_time = bs->slice_end - bs->slice_start;
3105 slice_time /= (NANOSECONDS_PER_SECOND);
3106 bytes_limit = bps_limit * slice_time;
3107 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3108 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3109 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3110 }
3111
3112 /* bytes_base: the bytes of data which have been read/written; and
3113 * it is obtained from the history statistic info.
3114 * bytes_res: the remaining bytes of data which need to be read/written.
3115 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3116 * the total time for completing reading/writting all data.
3117 */
3118 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3119
3120 if (bytes_base + bytes_res <= bytes_limit) {
3121 if (wait) {
3122 *wait = 0;
3123 }
3124
3125 return false;
3126 }
3127
3128 /* Calc approx time to dispatch */
3129 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3130
3131 /* When the I/O rate at runtime exceeds the limits,
3132 * bs->slice_end need to be extended in order that the current statistic
3133 * info can be kept until the timer fire, so it is increased and tuned
3134 * based on the result of experiment.
3135 */
3136 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3137 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3138 if (wait) {
3139 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3140 }
3141
3142 return true;
3143}
3144
3145static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3146 double elapsed_time, uint64_t *wait)
3147{
3148 uint64_t iops_limit = 0;
3149 double ios_limit, ios_base;
3150 double slice_time, wait_time;
3151
3152 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3153 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3154 } else if (bs->io_limits.iops[is_write]) {
3155 iops_limit = bs->io_limits.iops[is_write];
3156 } else {
3157 if (wait) {
3158 *wait = 0;
3159 }
3160
3161 return false;
3162 }
3163
3164 slice_time = bs->slice_end - bs->slice_start;
3165 slice_time /= (NANOSECONDS_PER_SECOND);
3166 ios_limit = iops_limit * slice_time;
3167 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3168 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3169 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3170 }
3171
3172 if (ios_base + 1 <= ios_limit) {
3173 if (wait) {
3174 *wait = 0;
3175 }
3176
3177 return false;
3178 }
3179
3180 /* Calc approx time to dispatch */
3181 wait_time = (ios_base + 1) / iops_limit;
3182 if (wait_time > elapsed_time) {
3183 wait_time = wait_time - elapsed_time;
3184 } else {
3185 wait_time = 0;
3186 }
3187
3188 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3189 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3190 if (wait) {
3191 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3192 }
3193
3194 return true;
3195}
3196
3197static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3198 bool is_write, int64_t *wait)
3199{
3200 int64_t now, max_wait;
3201 uint64_t bps_wait = 0, iops_wait = 0;
3202 double elapsed_time;
3203 int bps_ret, iops_ret;
3204
3205 now = qemu_get_clock_ns(vm_clock);
3206 if ((bs->slice_start < now)
3207 && (bs->slice_end > now)) {
3208 bs->slice_end = now + bs->slice_time;
3209 } else {
3210 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3211 bs->slice_start = now;
3212 bs->slice_end = now + bs->slice_time;
3213
3214 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3215 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3216
3217 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3218 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3219 }
3220
3221 elapsed_time = now - bs->slice_start;
3222 elapsed_time /= (NANOSECONDS_PER_SECOND);
3223
3224 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3225 is_write, elapsed_time, &bps_wait);
3226 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3227 elapsed_time, &iops_wait);
3228 if (bps_ret || iops_ret) {
3229 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3230 if (wait) {
3231 *wait = max_wait;
3232 }
3233
3234 now = qemu_get_clock_ns(vm_clock);
3235 if (bs->slice_end < now + max_wait) {
3236 bs->slice_end = now + max_wait;
3237 }
3238
3239 return true;
3240 }
3241
3242 if (wait) {
3243 *wait = 0;
3244 }
3245
3246 return false;
3247}
pbrookce1a14d2006-08-07 02:38:06 +00003248
bellard83f64092006-08-01 16:21:11 +00003249/**************************************************************/
3250/* async block device emulation */
3251
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003252typedef struct BlockDriverAIOCBSync {
3253 BlockDriverAIOCB common;
3254 QEMUBH *bh;
3255 int ret;
3256 /* vector translation state */
3257 QEMUIOVector *qiov;
3258 uint8_t *bounce;
3259 int is_write;
3260} BlockDriverAIOCBSync;
3261
3262static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3263{
Kevin Wolfb666d232010-05-05 11:44:39 +02003264 BlockDriverAIOCBSync *acb =
3265 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003266 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003267 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003268 qemu_aio_release(acb);
3269}
3270
3271static AIOPool bdrv_em_aio_pool = {
3272 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3273 .cancel = bdrv_aio_cancel_em,
3274};
3275
bellard83f64092006-08-01 16:21:11 +00003276static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003277{
pbrookce1a14d2006-08-07 02:38:06 +00003278 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003279
aliguorif141eaf2009-04-07 18:43:24 +00003280 if (!acb->is_write)
3281 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003282 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003283 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003284 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003285 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003286 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003287}
bellardbeac80c2006-06-26 20:08:57 +00003288
aliguorif141eaf2009-04-07 18:43:24 +00003289static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3290 int64_t sector_num,
3291 QEMUIOVector *qiov,
3292 int nb_sectors,
3293 BlockDriverCompletionFunc *cb,
3294 void *opaque,
3295 int is_write)
3296
bellardea2384d2004-08-01 21:59:26 +00003297{
pbrookce1a14d2006-08-07 02:38:06 +00003298 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003299
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003300 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003301 acb->is_write = is_write;
3302 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003303 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003304 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003305
3306 if (is_write) {
3307 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003308 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003309 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003310 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003311 }
3312
pbrookce1a14d2006-08-07 02:38:06 +00003313 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003314
pbrookce1a14d2006-08-07 02:38:06 +00003315 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003316}
3317
aliguorif141eaf2009-04-07 18:43:24 +00003318static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3319 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003320 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003321{
aliguorif141eaf2009-04-07 18:43:24 +00003322 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003323}
3324
aliguorif141eaf2009-04-07 18:43:24 +00003325static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3326 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3327 BlockDriverCompletionFunc *cb, void *opaque)
3328{
3329 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3330}
3331
Kevin Wolf68485422011-06-30 10:05:46 +02003332
3333typedef struct BlockDriverAIOCBCoroutine {
3334 BlockDriverAIOCB common;
3335 BlockRequest req;
3336 bool is_write;
3337 QEMUBH* bh;
3338} BlockDriverAIOCBCoroutine;
3339
3340static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3341{
3342 qemu_aio_flush();
3343}
3344
3345static AIOPool bdrv_em_co_aio_pool = {
3346 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3347 .cancel = bdrv_aio_co_cancel_em,
3348};
3349
Paolo Bonzini35246a62011-10-14 10:41:29 +02003350static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003351{
3352 BlockDriverAIOCBCoroutine *acb = opaque;
3353
3354 acb->common.cb(acb->common.opaque, acb->req.error);
3355 qemu_bh_delete(acb->bh);
3356 qemu_aio_release(acb);
3357}
3358
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003359/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3360static void coroutine_fn bdrv_co_do_rw(void *opaque)
3361{
3362 BlockDriverAIOCBCoroutine *acb = opaque;
3363 BlockDriverState *bs = acb->common.bs;
3364
3365 if (!acb->is_write) {
3366 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003367 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003368 } else {
3369 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003370 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003371 }
3372
Paolo Bonzini35246a62011-10-14 10:41:29 +02003373 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003374 qemu_bh_schedule(acb->bh);
3375}
3376
Kevin Wolf68485422011-06-30 10:05:46 +02003377static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3378 int64_t sector_num,
3379 QEMUIOVector *qiov,
3380 int nb_sectors,
3381 BlockDriverCompletionFunc *cb,
3382 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003383 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003384{
3385 Coroutine *co;
3386 BlockDriverAIOCBCoroutine *acb;
3387
3388 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3389 acb->req.sector = sector_num;
3390 acb->req.nb_sectors = nb_sectors;
3391 acb->req.qiov = qiov;
3392 acb->is_write = is_write;
3393
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003394 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003395 qemu_coroutine_enter(co, acb);
3396
3397 return &acb->common;
3398}
3399
Paolo Bonzini07f07612011-10-17 12:32:12 +02003400static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003401{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003402 BlockDriverAIOCBCoroutine *acb = opaque;
3403 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003404
Paolo Bonzini07f07612011-10-17 12:32:12 +02003405 acb->req.error = bdrv_co_flush(bs);
3406 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003407 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003408}
3409
Paolo Bonzini07f07612011-10-17 12:32:12 +02003410BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003411 BlockDriverCompletionFunc *cb, void *opaque)
3412{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003413 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003414
Paolo Bonzini07f07612011-10-17 12:32:12 +02003415 Coroutine *co;
3416 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003417
Paolo Bonzini07f07612011-10-17 12:32:12 +02003418 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3419 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3420 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003421
Alexander Graf016f5cf2010-05-26 17:51:49 +02003422 return &acb->common;
3423}
3424
Paolo Bonzini4265d622011-10-17 12:32:14 +02003425static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3426{
3427 BlockDriverAIOCBCoroutine *acb = opaque;
3428 BlockDriverState *bs = acb->common.bs;
3429
3430 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3431 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3432 qemu_bh_schedule(acb->bh);
3433}
3434
3435BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3436 int64_t sector_num, int nb_sectors,
3437 BlockDriverCompletionFunc *cb, void *opaque)
3438{
3439 Coroutine *co;
3440 BlockDriverAIOCBCoroutine *acb;
3441
3442 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3443
3444 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3445 acb->req.sector = sector_num;
3446 acb->req.nb_sectors = nb_sectors;
3447 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3448 qemu_coroutine_enter(co, acb);
3449
3450 return &acb->common;
3451}
3452
bellardea2384d2004-08-01 21:59:26 +00003453void bdrv_init(void)
3454{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003455 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003456}
pbrookce1a14d2006-08-07 02:38:06 +00003457
Markus Armbrustereb852012009-10-27 18:41:44 +01003458void bdrv_init_with_whitelist(void)
3459{
3460 use_bdrv_whitelist = 1;
3461 bdrv_init();
3462}
3463
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003464void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3465 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003466{
pbrookce1a14d2006-08-07 02:38:06 +00003467 BlockDriverAIOCB *acb;
3468
aliguori6bbff9a2009-03-20 18:25:59 +00003469 if (pool->free_aiocb) {
3470 acb = pool->free_aiocb;
3471 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003472 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003473 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003474 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003475 }
3476 acb->bs = bs;
3477 acb->cb = cb;
3478 acb->opaque = opaque;
3479 return acb;
3480}
3481
3482void qemu_aio_release(void *p)
3483{
aliguori6bbff9a2009-03-20 18:25:59 +00003484 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3485 AIOPool *pool = acb->pool;
3486 acb->next = pool->free_aiocb;
3487 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003488}
bellard19cb3732006-08-19 11:45:59 +00003489
3490/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003491/* Coroutine block device emulation */
3492
3493typedef struct CoroutineIOCompletion {
3494 Coroutine *coroutine;
3495 int ret;
3496} CoroutineIOCompletion;
3497
3498static void bdrv_co_io_em_complete(void *opaque, int ret)
3499{
3500 CoroutineIOCompletion *co = opaque;
3501
3502 co->ret = ret;
3503 qemu_coroutine_enter(co->coroutine, NULL);
3504}
3505
3506static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3507 int nb_sectors, QEMUIOVector *iov,
3508 bool is_write)
3509{
3510 CoroutineIOCompletion co = {
3511 .coroutine = qemu_coroutine_self(),
3512 };
3513 BlockDriverAIOCB *acb;
3514
3515 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003516 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3517 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003518 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003519 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3520 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003521 }
3522
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003523 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003524 if (!acb) {
3525 return -EIO;
3526 }
3527 qemu_coroutine_yield();
3528
3529 return co.ret;
3530}
3531
3532static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3533 int64_t sector_num, int nb_sectors,
3534 QEMUIOVector *iov)
3535{
3536 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3537}
3538
3539static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3540 int64_t sector_num, int nb_sectors,
3541 QEMUIOVector *iov)
3542{
3543 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3544}
3545
Paolo Bonzini07f07612011-10-17 12:32:12 +02003546static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003547{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003548 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003549
Paolo Bonzini07f07612011-10-17 12:32:12 +02003550 rwco->ret = bdrv_co_flush(rwco->bs);
3551}
3552
3553int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3554{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003555 int ret;
3556
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003557 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003558 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003559 }
3560
Kevin Wolfca716362011-11-10 18:13:59 +01003561 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003562 if (bs->drv->bdrv_co_flush_to_os) {
3563 ret = bs->drv->bdrv_co_flush_to_os(bs);
3564 if (ret < 0) {
3565 return ret;
3566 }
3567 }
3568
Kevin Wolfca716362011-11-10 18:13:59 +01003569 /* But don't actually force it to the disk with cache=unsafe */
3570 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3571 return 0;
3572 }
3573
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003574 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003575 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003576 } else if (bs->drv->bdrv_aio_flush) {
3577 BlockDriverAIOCB *acb;
3578 CoroutineIOCompletion co = {
3579 .coroutine = qemu_coroutine_self(),
3580 };
3581
3582 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3583 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003584 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003585 } else {
3586 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003587 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003588 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003589 } else {
3590 /*
3591 * Some block drivers always operate in either writethrough or unsafe
3592 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3593 * know how the server works (because the behaviour is hardcoded or
3594 * depends on server-side configuration), so we can't ensure that
3595 * everything is safe on disk. Returning an error doesn't work because
3596 * that would break guests even if the server operates in writethrough
3597 * mode.
3598 *
3599 * Let's hope the user knows what he's doing.
3600 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003601 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003602 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003603 if (ret < 0) {
3604 return ret;
3605 }
3606
3607 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3608 * in the case of cache=unsafe, so there are no useless flushes.
3609 */
3610 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003611}
3612
Anthony Liguori0f154232011-11-14 15:09:45 -06003613void bdrv_invalidate_cache(BlockDriverState *bs)
3614{
3615 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3616 bs->drv->bdrv_invalidate_cache(bs);
3617 }
3618}
3619
3620void bdrv_invalidate_cache_all(void)
3621{
3622 BlockDriverState *bs;
3623
3624 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3625 bdrv_invalidate_cache(bs);
3626 }
3627}
3628
Benoît Canet07789262012-03-23 08:36:49 +01003629void bdrv_clear_incoming_migration_all(void)
3630{
3631 BlockDriverState *bs;
3632
3633 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3634 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3635 }
3636}
3637
Paolo Bonzini07f07612011-10-17 12:32:12 +02003638int bdrv_flush(BlockDriverState *bs)
3639{
3640 Coroutine *co;
3641 RwCo rwco = {
3642 .bs = bs,
3643 .ret = NOT_DONE,
3644 };
3645
3646 if (qemu_in_coroutine()) {
3647 /* Fast-path if already in coroutine context */
3648 bdrv_flush_co_entry(&rwco);
3649 } else {
3650 co = qemu_coroutine_create(bdrv_flush_co_entry);
3651 qemu_coroutine_enter(co, &rwco);
3652 while (rwco.ret == NOT_DONE) {
3653 qemu_aio_wait();
3654 }
3655 }
3656
3657 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003658}
3659
Paolo Bonzini4265d622011-10-17 12:32:14 +02003660static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3661{
3662 RwCo *rwco = opaque;
3663
3664 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3665}
3666
3667int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3668 int nb_sectors)
3669{
3670 if (!bs->drv) {
3671 return -ENOMEDIUM;
3672 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3673 return -EIO;
3674 } else if (bs->read_only) {
3675 return -EROFS;
3676 } else if (bs->drv->bdrv_co_discard) {
3677 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3678 } else if (bs->drv->bdrv_aio_discard) {
3679 BlockDriverAIOCB *acb;
3680 CoroutineIOCompletion co = {
3681 .coroutine = qemu_coroutine_self(),
3682 };
3683
3684 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3685 bdrv_co_io_em_complete, &co);
3686 if (acb == NULL) {
3687 return -EIO;
3688 } else {
3689 qemu_coroutine_yield();
3690 return co.ret;
3691 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003692 } else {
3693 return 0;
3694 }
3695}
3696
3697int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3698{
3699 Coroutine *co;
3700 RwCo rwco = {
3701 .bs = bs,
3702 .sector_num = sector_num,
3703 .nb_sectors = nb_sectors,
3704 .ret = NOT_DONE,
3705 };
3706
3707 if (qemu_in_coroutine()) {
3708 /* Fast-path if already in coroutine context */
3709 bdrv_discard_co_entry(&rwco);
3710 } else {
3711 co = qemu_coroutine_create(bdrv_discard_co_entry);
3712 qemu_coroutine_enter(co, &rwco);
3713 while (rwco.ret == NOT_DONE) {
3714 qemu_aio_wait();
3715 }
3716 }
3717
3718 return rwco.ret;
3719}
3720
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003721/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003722/* removable device support */
3723
3724/**
3725 * Return TRUE if the media is present
3726 */
3727int bdrv_is_inserted(BlockDriverState *bs)
3728{
3729 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003730
bellard19cb3732006-08-19 11:45:59 +00003731 if (!drv)
3732 return 0;
3733 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003734 return 1;
3735 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003736}
3737
3738/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003739 * Return whether the media changed since the last call to this
3740 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003741 */
3742int bdrv_media_changed(BlockDriverState *bs)
3743{
3744 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003745
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003746 if (drv && drv->bdrv_media_changed) {
3747 return drv->bdrv_media_changed(bs);
3748 }
3749 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003750}
3751
3752/**
3753 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3754 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02003755void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003756{
3757 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003758
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003759 if (drv && drv->bdrv_eject) {
3760 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003761 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02003762
3763 if (bs->device_name[0] != '\0') {
3764 bdrv_emit_qmp_eject_event(bs, eject_flag);
3765 }
bellard19cb3732006-08-19 11:45:59 +00003766}
3767
bellard19cb3732006-08-19 11:45:59 +00003768/**
3769 * Lock or unlock the media (if it is locked, the user won't be able
3770 * to eject it manually).
3771 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003772void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003773{
3774 BlockDriver *drv = bs->drv;
3775
Markus Armbruster025e8492011-09-06 18:58:47 +02003776 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003777
Markus Armbruster025e8492011-09-06 18:58:47 +02003778 if (drv && drv->bdrv_lock_medium) {
3779 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003780 }
3781}
ths985a03b2007-12-24 16:10:43 +00003782
3783/* needed for generic scsi interface */
3784
3785int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3786{
3787 BlockDriver *drv = bs->drv;
3788
3789 if (drv && drv->bdrv_ioctl)
3790 return drv->bdrv_ioctl(bs, req, buf);
3791 return -ENOTSUP;
3792}
aliguori7d780662009-03-12 19:57:08 +00003793
aliguori221f7152009-03-28 17:28:41 +00003794BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3795 unsigned long int req, void *buf,
3796 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003797{
aliguori221f7152009-03-28 17:28:41 +00003798 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003799
aliguori221f7152009-03-28 17:28:41 +00003800 if (drv && drv->bdrv_aio_ioctl)
3801 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3802 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003803}
aliguorie268ca52009-04-22 20:20:00 +00003804
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003805void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3806{
3807 bs->buffer_alignment = align;
3808}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003809
aliguorie268ca52009-04-22 20:20:00 +00003810void *qemu_blockalign(BlockDriverState *bs, size_t size)
3811{
3812 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3813}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003814
3815void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3816{
3817 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003818
Liran Schouraaa0eb72010-01-26 10:31:48 +02003819 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003820 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003821 if (!bs->dirty_bitmap) {
3822 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3823 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3824 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003825
Anthony Liguori7267c092011-08-20 22:09:37 -05003826 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003827 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003828 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003829 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003830 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003831 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003832 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003833 }
3834}
3835
3836int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3837{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003838 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003839
Jan Kiszkac6d22832009-11-30 18:21:20 +01003840 if (bs->dirty_bitmap &&
3841 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003842 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3843 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003844 } else {
3845 return 0;
3846 }
3847}
3848
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003849void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3850 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003851{
3852 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3853}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003854
3855int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3856{
3857 return bs->dirty_count;
3858}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003859
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003860void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3861{
3862 assert(bs->in_use != in_use);
3863 bs->in_use = in_use;
3864}
3865
3866int bdrv_in_use(BlockDriverState *bs)
3867{
3868 return bs->in_use;
3869}
3870
Luiz Capitulino28a72822011-09-26 17:43:50 -03003871void bdrv_iostatus_enable(BlockDriverState *bs)
3872{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003873 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003874 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003875}
3876
3877/* The I/O status is only enabled if the drive explicitly
3878 * enables it _and_ the VM is configured to stop on errors */
3879bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3880{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003881 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003882 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3883 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3884 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3885}
3886
3887void bdrv_iostatus_disable(BlockDriverState *bs)
3888{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003889 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003890}
3891
3892void bdrv_iostatus_reset(BlockDriverState *bs)
3893{
3894 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003895 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003896 }
3897}
3898
3899/* XXX: Today this is set by device models because it makes the implementation
3900 quite simple. However, the block layer knows about the error, so it's
3901 possible to implement this without device models being involved */
3902void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3903{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003904 if (bdrv_iostatus_is_enabled(bs) &&
3905 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003906 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003907 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3908 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003909 }
3910}
3911
Christoph Hellwiga597e792011-08-25 08:26:01 +02003912void
3913bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3914 enum BlockAcctType type)
3915{
3916 assert(type < BDRV_MAX_IOTYPE);
3917
3918 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003919 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003920 cookie->type = type;
3921}
3922
3923void
3924bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3925{
3926 assert(cookie->type < BDRV_MAX_IOTYPE);
3927
3928 bs->nr_bytes[cookie->type] += cookie->bytes;
3929 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003930 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003931}
3932
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003933int bdrv_img_create(const char *filename, const char *fmt,
3934 const char *base_filename, const char *base_fmt,
3935 char *options, uint64_t img_size, int flags)
3936{
3937 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003938 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003939 BlockDriverState *bs = NULL;
3940 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003941 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003942 int ret = 0;
3943
3944 /* Find driver and parse its options */
3945 drv = bdrv_find_format(fmt);
3946 if (!drv) {
3947 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003948 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003949 goto out;
3950 }
3951
3952 proto_drv = bdrv_find_protocol(filename);
3953 if (!proto_drv) {
3954 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003955 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003956 goto out;
3957 }
3958
3959 create_options = append_option_parameters(create_options,
3960 drv->create_options);
3961 create_options = append_option_parameters(create_options,
3962 proto_drv->create_options);
3963
3964 /* Create parameter list with default values */
3965 param = parse_option_parameters("", create_options, param);
3966
3967 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3968
3969 /* Parse -o options */
3970 if (options) {
3971 param = parse_option_parameters(options, create_options, param);
3972 if (param == NULL) {
3973 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003974 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003975 goto out;
3976 }
3977 }
3978
3979 if (base_filename) {
3980 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3981 base_filename)) {
3982 error_report("Backing file not supported for file format '%s'",
3983 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003984 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003985 goto out;
3986 }
3987 }
3988
3989 if (base_fmt) {
3990 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3991 error_report("Backing file format not supported for file "
3992 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003993 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003994 goto out;
3995 }
3996 }
3997
Jes Sorensen792da932010-12-16 13:52:17 +01003998 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3999 if (backing_file && backing_file->value.s) {
4000 if (!strcmp(filename, backing_file->value.s)) {
4001 error_report("Error: Trying to create an image with the "
4002 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004003 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004004 goto out;
4005 }
4006 }
4007
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004008 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4009 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004010 backing_drv = bdrv_find_format(backing_fmt->value.s);
4011 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004012 error_report("Unknown backing file format '%s'",
4013 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004014 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004015 goto out;
4016 }
4017 }
4018
4019 // The size for the image must always be specified, with one exception:
4020 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004021 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4022 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004023 if (backing_file && backing_file->value.s) {
4024 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004025 char buf[32];
4026
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004027 bs = bdrv_new("");
4028
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004029 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004030 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004031 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004032 goto out;
4033 }
4034 bdrv_get_geometry(bs, &size);
4035 size *= 512;
4036
4037 snprintf(buf, sizeof(buf), "%" PRId64, size);
4038 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4039 } else {
4040 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004041 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004042 goto out;
4043 }
4044 }
4045
4046 printf("Formatting '%s', fmt=%s ", filename, fmt);
4047 print_option_parameters(param);
4048 puts("");
4049
4050 ret = bdrv_create(drv, filename, param);
4051
4052 if (ret < 0) {
4053 if (ret == -ENOTSUP) {
4054 error_report("Formatting or formatting option not supported for "
4055 "file format '%s'", fmt);
4056 } else if (ret == -EFBIG) {
4057 error_report("The image size is too large for file format '%s'",
4058 fmt);
4059 } else {
4060 error_report("%s: error while creating %s: %s", filename, fmt,
4061 strerror(-ret));
4062 }
4063 }
4064
4065out:
4066 free_option_parameters(create_options);
4067 free_option_parameters(param);
4068
4069 if (bs) {
4070 bdrv_delete(bs);
4071 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004072
4073 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004074}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004075
4076void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
4077 BlockDriverCompletionFunc *cb, void *opaque)
4078{
4079 BlockJob *job;
4080
4081 if (bs->job || bdrv_in_use(bs)) {
4082 return NULL;
4083 }
4084 bdrv_set_in_use(bs, 1);
4085
4086 job = g_malloc0(job_type->instance_size);
4087 job->job_type = job_type;
4088 job->bs = bs;
4089 job->cb = cb;
4090 job->opaque = opaque;
4091 bs->job = job;
4092 return job;
4093}
4094
4095void block_job_complete(BlockJob *job, int ret)
4096{
4097 BlockDriverState *bs = job->bs;
4098
4099 assert(bs->job == job);
4100 job->cb(job->opaque, ret);
4101 bs->job = NULL;
4102 g_free(job);
4103 bdrv_set_in_use(bs, 0);
4104}
4105
4106int block_job_set_speed(BlockJob *job, int64_t value)
4107{
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004108 int rc;
4109
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004110 if (!job->job_type->set_speed) {
4111 return -ENOTSUP;
4112 }
Paolo Bonzini9f25ecc2012-03-30 13:17:12 +02004113 rc = job->job_type->set_speed(job, value);
4114 if (rc == 0) {
4115 job->speed = value;
4116 }
4117 return rc;
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00004118}
4119
4120void block_job_cancel(BlockJob *job)
4121{
4122 job->cancelled = true;
4123}
4124
4125bool block_job_is_cancelled(BlockJob *job)
4126{
4127 return job->cancelled;
4128}
Paolo Bonzini3e914652012-03-30 13:17:11 +02004129
4130void block_job_cancel_sync(BlockJob *job)
4131{
4132 BlockDriverState *bs = job->bs;
4133
4134 assert(bs->job == job);
4135 block_job_cancel(job);
4136 while (bs->job != NULL && bs->job->busy) {
4137 qemu_aio_wait();
4138 }
4139}