blob: c9fa5c17fff47d4c4e0e30204e390b715de0ad13 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050029#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020030#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020031#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030032#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080033#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000034
Juan Quintela71e72a12009-07-27 16:12:56 +020035#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000036#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000039#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000040#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/disk.h>
42#endif
blueswir1c5e97232009-03-07 20:06:23 +000043#endif
bellard7674e7b2005-04-26 21:59:26 +000044
aliguori49dc7682009-03-08 16:26:59 +000045#ifdef _WIN32
46#include <windows.h>
47#endif
48
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010049#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000051typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000053 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000054} BdrvRequestFlags;
55
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000057static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000059 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000060static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000062 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020063static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010069static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000070 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000073 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010075static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010081 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010082static void coroutine_fn bdrv_co_do_rw(void *opaque);
bellardec530c82006-04-25 22:36:06 +000083
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080084static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
85 bool is_write, double elapsed_time, uint64_t *wait);
86static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
87 double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
89 bool is_write, int64_t *wait);
90
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010094static QLIST_HEAD(, BlockDriver) bdrv_drivers =
95 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000096
Markus Armbrusterf9092b12010-06-25 10:33:39 +020097/* The device to use for VM snapshots */
98static BlockDriverState *bs_snapshots;
99
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800124void bdrv_io_limits_disable(BlockDriverState *bs)
125{
126 bs->io_limits_enabled = false;
127
128 while (qemu_co_queue_next(&bs->throttled_reqs));
129
130 if (bs->block_timer) {
131 qemu_del_timer(bs->block_timer);
132 qemu_free_timer(bs->block_timer);
133 bs->block_timer = NULL;
134 }
135
136 bs->slice_start = 0;
137 bs->slice_end = 0;
138 bs->slice_time = 0;
139 memset(&bs->io_base, 0, sizeof(bs->io_base));
140}
141
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800142static void bdrv_block_timer(void *opaque)
143{
144 BlockDriverState *bs = opaque;
145
146 qemu_co_queue_next(&bs->throttled_reqs);
147}
148
149void bdrv_io_limits_enable(BlockDriverState *bs)
150{
151 qemu_co_queue_init(&bs->throttled_reqs);
152 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
153 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
154 bs->slice_start = qemu_get_clock_ns(vm_clock);
155 bs->slice_end = bs->slice_start + bs->slice_time;
156 memset(&bs->io_base, 0, sizeof(bs->io_base));
157 bs->io_limits_enabled = true;
158}
159
160bool bdrv_io_limits_enabled(BlockDriverState *bs)
161{
162 BlockIOLimit *io_limits = &bs->io_limits;
163 return io_limits->bps[BLOCK_IO_LIMIT_READ]
164 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
165 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
166 || io_limits->iops[BLOCK_IO_LIMIT_READ]
167 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
169}
170
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800171static void bdrv_io_limits_intercept(BlockDriverState *bs,
172 bool is_write, int nb_sectors)
173{
174 int64_t wait_time = -1;
175
176 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
177 qemu_co_queue_wait(&bs->throttled_reqs);
178 }
179
180 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
181 * throttled requests will not be dequeued until the current request is
182 * allowed to be serviced. So if the current request still exceeds the
183 * limits, it will be inserted to the head. All requests followed it will
184 * be still in throttled_reqs queue.
185 */
186
187 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
188 qemu_mod_timer(bs->block_timer,
189 wait_time + qemu_get_clock_ns(vm_clock));
190 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
191 }
192
193 qemu_co_queue_next(&bs->throttled_reqs);
194}
195
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000196/* check if the path starts with "<protocol>:" */
197static int path_has_protocol(const char *path)
198{
199#ifdef _WIN32
200 if (is_windows_drive(path) ||
201 is_windows_drive_prefix(path)) {
202 return 0;
203 }
204#endif
205
206 return strchr(path, ':') != NULL;
207}
208
bellard83f64092006-08-01 16:21:11 +0000209int path_is_absolute(const char *path)
210{
211 const char *p;
bellard21664422007-01-07 18:22:37 +0000212#ifdef _WIN32
213 /* specific case for names like: "\\.\d:" */
214 if (*path == '/' || *path == '\\')
215 return 1;
216#endif
bellard83f64092006-08-01 16:21:11 +0000217 p = strchr(path, ':');
218 if (p)
219 p++;
220 else
221 p = path;
bellard3b9f94e2007-01-07 17:27:07 +0000222#ifdef _WIN32
223 return (*p == '/' || *p == '\\');
224#else
225 return (*p == '/');
226#endif
bellard83f64092006-08-01 16:21:11 +0000227}
228
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
235{
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
bellard83f64092006-08-01 16:21:11 +0000258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
270 }
271}
272
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500273void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000274{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 }
bellard83f64092006-08-01 16:21:11 +0000288 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200289
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000291}
bellardb3380822004-03-14 21:38:54 +0000292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000295{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100296 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000297
Anthony Liguori7267c092011-08-20 22:09:37 -0500298 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000300 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000302 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300303 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000304 return bs;
305}
306
bellardea2384d2004-08-01 21:59:26 +0000307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000312 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100313 }
bellardea2384d2004-08-01 21:59:26 +0000314 }
315 return NULL;
316}
317
Markus Armbrustereb852012009-10-27 18:41:44 +0100318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200342int bdrv_create(BlockDriver *drv, const char* filename,
343 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000344{
345 if (!drv->bdrv_create)
346 return -ENOTSUP;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200347
348 return drv->bdrv_create(filename, options);
bellardea2384d2004-08-01 21:59:26 +0000349}
350
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200351int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
352{
353 BlockDriver *drv;
354
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900355 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200356 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000357 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200358 }
359
360 return bdrv_create(drv, filename, options);
361}
362
bellardd5249392004-08-03 21:14:23 +0000363#ifdef _WIN32
bellard95389c82005-12-18 18:28:15 +0000364void get_tmp_filename(char *filename, int size)
bellardd5249392004-08-03 21:14:23 +0000365{
bellard3b9f94e2007-01-07 17:27:07 +0000366 char temp_dir[MAX_PATH];
ths3b46e622007-09-17 08:09:54 +0000367
bellard3b9f94e2007-01-07 17:27:07 +0000368 GetTempPath(MAX_PATH, temp_dir);
369 GetTempFileName(temp_dir, "qem", 0, filename);
bellardd5249392004-08-03 21:14:23 +0000370}
371#else
bellard95389c82005-12-18 18:28:15 +0000372void get_tmp_filename(char *filename, int size)
bellardea2384d2004-08-01 21:59:26 +0000373{
374 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000375 const char *tmpdir;
bellardd5249392004-08-03 21:14:23 +0000376 /* XXX: race condition possible */
aurel320badc1e2008-03-10 00:05:34 +0000377 tmpdir = getenv("TMPDIR");
378 if (!tmpdir)
379 tmpdir = "/tmp";
380 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
bellardea2384d2004-08-01 21:59:26 +0000381 fd = mkstemp(filename);
382 close(fd);
383}
bellardd5249392004-08-03 21:14:23 +0000384#endif
bellardea2384d2004-08-01 21:59:26 +0000385
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200386/*
387 * Detect host devices. By convention, /dev/cdrom[N] is always
388 * recognized as a host CDROM.
389 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200390static BlockDriver *find_hdev_driver(const char *filename)
391{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200392 int score_max = 0, score;
393 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200394
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100395 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200396 if (d->bdrv_probe_device) {
397 score = d->bdrv_probe_device(filename);
398 if (score > score_max) {
399 score_max = score;
400 drv = d;
401 }
402 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200403 }
404
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200405 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200406}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200407
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900408BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200409{
410 BlockDriver *drv1;
411 char protocol[128];
412 int len;
413 const char *p;
414
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200415 /* TODO Drivers without bdrv_file_open must be specified explicitly */
416
Christoph Hellwig39508e72010-06-23 12:25:17 +0200417 /*
418 * XXX(hch): we really should not let host device detection
419 * override an explicit protocol specification, but moving this
420 * later breaks access to device names with colons in them.
421 * Thanks to the brain-dead persistent naming schemes on udev-
422 * based Linux systems those actually are quite common.
423 */
424 drv1 = find_hdev_driver(filename);
425 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200426 return drv1;
427 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200428
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000429 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200430 return bdrv_find_format("file");
431 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000432 p = strchr(filename, ':');
433 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200434 len = p - filename;
435 if (len > sizeof(protocol) - 1)
436 len = sizeof(protocol) - 1;
437 memcpy(protocol, filename, len);
438 protocol[len] = '\0';
439 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
440 if (drv1->protocol_name &&
441 !strcmp(drv1->protocol_name, protocol)) {
442 return drv1;
443 }
444 }
445 return NULL;
446}
447
Stefan Weilc98ac352010-07-21 21:51:51 +0200448static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000449{
bellard83f64092006-08-01 16:21:11 +0000450 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000451 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000452 uint8_t buf[2048];
453 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000454
Naphtali Spreif5edb012010-01-17 16:48:13 +0200455 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200456 if (ret < 0) {
457 *pdrv = NULL;
458 return ret;
459 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700460
Kevin Wolf08a00552010-06-01 18:37:31 +0200461 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
462 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700463 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200464 drv = bdrv_find_format("raw");
465 if (!drv) {
466 ret = -ENOENT;
467 }
468 *pdrv = drv;
469 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700470 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700471
bellard83f64092006-08-01 16:21:11 +0000472 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
473 bdrv_delete(bs);
474 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200475 *pdrv = NULL;
476 return ret;
bellard83f64092006-08-01 16:21:11 +0000477 }
478
bellardea2384d2004-08-01 21:59:26 +0000479 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200480 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000482 if (drv1->bdrv_probe) {
483 score = drv1->bdrv_probe(buf, ret, filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = drv1;
487 }
bellardea2384d2004-08-01 21:59:26 +0000488 }
489 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200490 if (!drv) {
491 ret = -ENOENT;
492 }
493 *pdrv = drv;
494 return ret;
bellardea2384d2004-08-01 21:59:26 +0000495}
496
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100497/**
498 * Set the current 'total_sectors' value
499 */
500static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
501{
502 BlockDriver *drv = bs->drv;
503
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700504 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
505 if (bs->sg)
506 return 0;
507
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100508 /* query actual device if possible, otherwise just trust the hint */
509 if (drv->bdrv_getlength) {
510 int64_t length = drv->bdrv_getlength(bs);
511 if (length < 0) {
512 return length;
513 }
514 hint = length >> BDRV_SECTOR_BITS;
515 }
516
517 bs->total_sectors = hint;
518 return 0;
519}
520
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100521/**
522 * Set open flags for a given cache mode
523 *
524 * Return 0 on success, -1 if the cache mode was invalid.
525 */
526int bdrv_parse_cache_flags(const char *mode, int *flags)
527{
528 *flags &= ~BDRV_O_CACHE_MASK;
529
530 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
531 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100532 } else if (!strcmp(mode, "directsync")) {
533 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100534 } else if (!strcmp(mode, "writeback")) {
535 *flags |= BDRV_O_CACHE_WB;
536 } else if (!strcmp(mode, "unsafe")) {
537 *flags |= BDRV_O_CACHE_WB;
538 *flags |= BDRV_O_NO_FLUSH;
539 } else if (!strcmp(mode, "writethrough")) {
540 /* this is the default */
541 } else {
542 return -1;
543 }
544
545 return 0;
546}
547
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000548/**
549 * The copy-on-read flag is actually a reference count so multiple users may
550 * use the feature without worrying about clobbering its previous state.
551 * Copy-on-read stays enabled until all users have called to disable it.
552 */
553void bdrv_enable_copy_on_read(BlockDriverState *bs)
554{
555 bs->copy_on_read++;
556}
557
558void bdrv_disable_copy_on_read(BlockDriverState *bs)
559{
560 assert(bs->copy_on_read > 0);
561 bs->copy_on_read--;
562}
563
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200564/*
Kevin Wolf57915332010-04-14 15:24:50 +0200565 * Common part for opening disk images and files
566 */
567static int bdrv_open_common(BlockDriverState *bs, const char *filename,
568 int flags, BlockDriver *drv)
569{
570 int ret, open_flags;
571
572 assert(drv != NULL);
573
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100574 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
575
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200576 bs->file = NULL;
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100577 bs->total_sectors = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200578 bs->encrypted = 0;
579 bs->valid_key = 0;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100580 bs->sg = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200581 bs->open_flags = flags;
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100582 bs->growable = 0;
Kevin Wolf57915332010-04-14 15:24:50 +0200583 bs->buffer_alignment = 512;
584
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000585 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
586 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
587 bdrv_enable_copy_on_read(bs);
588 }
589
Kevin Wolf57915332010-04-14 15:24:50 +0200590 pstrcpy(bs->filename, sizeof(bs->filename), filename);
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100591 bs->backing_file[0] = '\0';
Kevin Wolf57915332010-04-14 15:24:50 +0200592
593 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
594 return -ENOTSUP;
595 }
596
597 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500598 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200599
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100600 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Kevin Wolf57915332010-04-14 15:24:50 +0200601
602 /*
603 * Clear flags that are internal to the block layer before opening the
604 * image.
605 */
606 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
607
608 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200609 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200610 */
611 if (bs->is_temporary) {
612 open_flags |= BDRV_O_RDWR;
613 }
614
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100615 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* Open the image, either directly or using a protocol */
618 if (drv->bdrv_file_open) {
619 ret = drv->bdrv_file_open(bs, filename, open_flags);
620 } else {
621 ret = bdrv_file_open(&bs->file, filename, open_flags);
622 if (ret >= 0) {
623 ret = drv->bdrv_open(bs, open_flags);
624 }
625 }
626
Kevin Wolf57915332010-04-14 15:24:50 +0200627 if (ret < 0) {
628 goto free_and_fail;
629 }
630
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100631 ret = refresh_total_sectors(bs, bs->total_sectors);
632 if (ret < 0) {
633 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200634 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100635
Kevin Wolf57915332010-04-14 15:24:50 +0200636#ifndef _WIN32
637 if (bs->is_temporary) {
638 unlink(filename);
639 }
640#endif
641 return 0;
642
643free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200644 if (bs->file) {
645 bdrv_delete(bs->file);
646 bs->file = NULL;
647 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500648 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200649 bs->opaque = NULL;
650 bs->drv = NULL;
651 return ret;
652}
653
654/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200655 * Opens a file using a protocol (file, host_device, nbd, ...)
656 */
bellard83f64092006-08-01 16:21:11 +0000657int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000658{
bellard83f64092006-08-01 16:21:11 +0000659 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200660 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000661 int ret;
662
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900663 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200664 if (!drv) {
665 return -ENOENT;
666 }
667
bellard83f64092006-08-01 16:21:11 +0000668 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200669 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
671 bdrv_delete(bs);
672 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000673 }
aliguori71d07702009-03-03 17:37:16 +0000674 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000675 *pbs = bs;
676 return 0;
bellardea2384d2004-08-01 21:59:26 +0000677}
bellardfc01f7e2003-06-30 10:03:06 +0000678
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200679/*
680 * Opens a disk image (raw, qcow2, vmdk, ...)
681 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200682int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
683 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000684{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200685 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200686 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000687
bellard83f64092006-08-01 16:21:11 +0000688 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000689 BlockDriverState *bs1;
690 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000691 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200692 BlockDriver *bdrv_qcow2;
693 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200694 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000695
bellardea2384d2004-08-01 21:59:26 +0000696 /* if snapshot, we create a temporary backing file and open it
697 instead of opening 'filename' directly */
698
699 /* if there is a backing file, use it */
700 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200701 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000702 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000703 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000704 return ret;
bellardea2384d2004-08-01 21:59:26 +0000705 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200706 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000707
708 if (bs1->drv && bs1->drv->protocol_name)
709 is_protocol = 1;
710
bellardea2384d2004-08-01 21:59:26 +0000711 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000712
bellardea2384d2004-08-01 21:59:26 +0000713 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
aliguori7c96d462008-09-12 17:54:13 +0000714
715 /* Real path is meaningless for protocols */
716 if (is_protocol)
717 snprintf(backing_filename, sizeof(backing_filename),
718 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000719 else if (!realpath(filename, backing_filename))
720 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000721
Kevin Wolf91a073a2009-05-27 14:48:06 +0200722 bdrv_qcow2 = bdrv_find_format("qcow2");
723 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
724
Jes Sorensen3e829902010-05-27 16:20:30 +0200725 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200726 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
727 if (drv) {
728 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
729 drv->format_name);
730 }
731
732 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200733 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000734 if (ret < 0) {
735 return ret;
bellardea2384d2004-08-01 21:59:26 +0000736 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200737
bellardea2384d2004-08-01 21:59:26 +0000738 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200739 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000740 bs->is_temporary = 1;
741 }
bellard712e7872005-04-28 21:09:32 +0000742
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200743 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200744 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200745 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000746 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100747
aliguori51d7c002009-03-05 23:00:29 +0000748 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000749 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000750 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751
752 /* Open the image */
753 ret = bdrv_open_common(bs, filename, flags, drv);
754 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100755 goto unlink_and_fail;
756 }
757
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200758 /* If there is a backing file, use it */
759 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
760 char backing_filename[PATH_MAX];
761 int back_flags;
762 BlockDriver *back_drv = NULL;
763
764 bs->backing_hd = bdrv_new("");
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000765
766 if (path_has_protocol(bs->backing_file)) {
767 pstrcpy(backing_filename, sizeof(backing_filename),
768 bs->backing_file);
769 } else {
770 path_combine(backing_filename, sizeof(backing_filename),
771 filename, bs->backing_file);
772 }
773
774 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200775 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000776 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200777
778 /* backing files always opened read-only */
779 back_flags =
780 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
781
782 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
783 if (ret < 0) {
784 bdrv_close(bs);
785 return ret;
786 }
787 if (bs->is_temporary) {
788 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
789 } else {
790 /* base image inherits from "parent" */
791 bs->backing_hd->keep_read_only = bs->keep_read_only;
792 }
793 }
794
795 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200796 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200797 }
798
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800799 /* throttling disk I/O limits */
800 if (bs->io_limits_enabled) {
801 bdrv_io_limits_enable(bs);
802 }
803
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200804 return 0;
805
806unlink_and_fail:
807 if (bs->is_temporary) {
808 unlink(filename);
809 }
810 return ret;
811}
812
bellardfc01f7e2003-06-30 10:03:06 +0000813void bdrv_close(BlockDriverState *bs)
814{
bellard19cb3732006-08-19 11:45:59 +0000815 if (bs->drv) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200816 if (bs == bs_snapshots) {
817 bs_snapshots = NULL;
818 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100819 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +0000820 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +0100821 bs->backing_hd = NULL;
822 }
bellardea2384d2004-08-01 21:59:26 +0000823 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -0500824 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +0000825#ifdef _WIN32
826 if (bs->is_temporary) {
827 unlink(bs->filename);
828 }
bellard67b915a2004-03-31 23:37:16 +0000829#endif
bellardea2384d2004-08-01 21:59:26 +0000830 bs->opaque = NULL;
831 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000832 bs->copy_on_read = 0;
bellardb3380822004-03-14 21:38:54 +0000833
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200834 if (bs->file != NULL) {
835 bdrv_close(bs->file);
836 }
837
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200838 bdrv_dev_change_media_cb(bs, false);
bellardb3380822004-03-14 21:38:54 +0000839 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800840
841 /*throttling disk I/O limits*/
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_disable(bs);
844 }
bellardb3380822004-03-14 21:38:54 +0000845}
846
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +0900847void bdrv_close_all(void)
848{
849 BlockDriverState *bs;
850
851 QTAILQ_FOREACH(bs, &bdrv_states, list) {
852 bdrv_close(bs);
853 }
854}
855
Stefan Hajnoczi922453b2011-11-30 12:23:43 +0000856/*
857 * Wait for pending requests to complete across all BlockDriverStates
858 *
859 * This function does not flush data to disk, use bdrv_flush_all() for that
860 * after calling this function.
861 */
862void bdrv_drain_all(void)
863{
864 BlockDriverState *bs;
865
866 qemu_aio_flush();
867
868 /* If requests are still pending there is a bug somewhere */
869 QTAILQ_FOREACH(bs, &bdrv_states, list) {
870 assert(QLIST_EMPTY(&bs->tracked_requests));
871 assert(qemu_co_queue_empty(&bs->throttled_reqs));
872 }
873}
874
Ryan Harperd22b2f42011-03-29 20:51:47 -0500875/* make a BlockDriverState anonymous by removing from bdrv_state list.
876 Also, NULL terminate the device_name to prevent double remove */
877void bdrv_make_anon(BlockDriverState *bs)
878{
879 if (bs->device_name[0] != '\0') {
880 QTAILQ_REMOVE(&bdrv_states, bs, list);
881 }
882 bs->device_name[0] = '\0';
883}
884
bellardb3380822004-03-14 21:38:54 +0000885void bdrv_delete(BlockDriverState *bs)
886{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200887 assert(!bs->dev);
Markus Armbruster18846de2010-06-29 16:58:30 +0200888
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100889 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -0500890 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +0000891
bellardb3380822004-03-14 21:38:54 +0000892 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200893 if (bs->file != NULL) {
894 bdrv_delete(bs->file);
895 }
896
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200897 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -0500898 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +0000899}
900
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200901int bdrv_attach_dev(BlockDriverState *bs, void *dev)
902/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +0200903{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200904 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +0200905 return -EBUSY;
906 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200907 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -0300908 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +0200909 return 0;
910}
911
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200912/* TODO qdevified devices don't use this, remove when devices are qdevified */
913void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +0200914{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200915 if (bdrv_attach_dev(bs, dev) < 0) {
916 abort();
917 }
918}
919
920void bdrv_detach_dev(BlockDriverState *bs, void *dev)
921/* TODO change to DeviceState *dev when all users are qdevified */
922{
923 assert(bs->dev == dev);
924 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +0200925 bs->dev_ops = NULL;
926 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +0200927 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +0200928}
929
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200930/* TODO change to return DeviceState * when all users are qdevified */
931void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +0200932{
Markus Armbrusterfa879d62011-08-03 15:07:40 +0200933 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +0200934}
935
Markus Armbruster0e49de52011-08-03 15:07:41 +0200936void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
937 void *opaque)
938{
939 bs->dev_ops = ops;
940 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200941 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
942 bs_snapshots = NULL;
943 }
Markus Armbruster0e49de52011-08-03 15:07:41 +0200944}
945
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200946static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +0200947{
Markus Armbruster145feb12011-08-03 15:07:42 +0200948 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200949 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Markus Armbruster145feb12011-08-03 15:07:42 +0200950 }
951}
952
Markus Armbruster2c6942f2011-09-06 18:58:51 +0200953bool bdrv_dev_has_removable_media(BlockDriverState *bs)
954{
955 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
956}
957
Paolo Bonzini025ccaa2011-11-07 17:50:13 +0100958void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
959{
960 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
961 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
962 }
963}
964
Markus Armbrustere4def802011-09-06 18:58:53 +0200965bool bdrv_dev_is_tray_open(BlockDriverState *bs)
966{
967 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
968 return bs->dev_ops->is_tray_open(bs->dev_opaque);
969 }
970 return false;
971}
972
Markus Armbruster145feb12011-08-03 15:07:42 +0200973static void bdrv_dev_resize_cb(BlockDriverState *bs)
974{
975 if (bs->dev_ops && bs->dev_ops->resize_cb) {
976 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +0200977 }
978}
979
Markus Armbrusterf1076392011-09-06 18:58:46 +0200980bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
981{
982 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
983 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
984 }
985 return false;
986}
987
aliguorie97fc192009-04-21 23:11:50 +0000988/*
989 * Run consistency checks on an image
990 *
Kevin Wolfe076f332010-06-29 11:43:13 +0200991 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +0200992 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +0200993 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +0000994 */
Kevin Wolfe076f332010-06-29 11:43:13 +0200995int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
aliguorie97fc192009-04-21 23:11:50 +0000996{
997 if (bs->drv->bdrv_check == NULL) {
998 return -ENOTSUP;
999 }
1000
Kevin Wolfe076f332010-06-29 11:43:13 +02001001 memset(res, 0, sizeof(*res));
Kevin Wolf9ac228e2010-06-29 12:37:54 +02001002 return bs->drv->bdrv_check(bs, res);
aliguorie97fc192009-04-21 23:11:50 +00001003}
1004
Kevin Wolf8a426612010-07-16 17:17:01 +02001005#define COMMIT_BUF_SECTORS 2048
1006
bellard33e39632003-07-06 17:15:21 +00001007/* commit COW file into the raw image */
1008int bdrv_commit(BlockDriverState *bs)
1009{
bellard19cb3732006-08-19 11:45:59 +00001010 BlockDriver *drv = bs->drv;
Kevin Wolfee181192010-08-05 13:05:22 +02001011 BlockDriver *backing_drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001012 int64_t sector, total_sectors;
1013 int n, ro, open_flags;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001014 int ret = 0, rw_ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001015 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001016 char filename[1024];
1017 BlockDriverState *bs_rw, *bs_ro;
bellard33e39632003-07-06 17:15:21 +00001018
bellard19cb3732006-08-19 11:45:59 +00001019 if (!drv)
1020 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001021
1022 if (!bs->backing_hd) {
1023 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001024 }
1025
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001026 if (bs->backing_hd->keep_read_only) {
1027 return -EACCES;
1028 }
Kevin Wolfee181192010-08-05 13:05:22 +02001029
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001030 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1031 return -EBUSY;
1032 }
1033
Kevin Wolfee181192010-08-05 13:05:22 +02001034 backing_drv = bs->backing_hd->drv;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001035 ro = bs->backing_hd->read_only;
1036 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1037 open_flags = bs->backing_hd->open_flags;
1038
1039 if (ro) {
1040 /* re-open as RW */
1041 bdrv_delete(bs->backing_hd);
1042 bs->backing_hd = NULL;
1043 bs_rw = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001044 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1045 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001046 if (rw_ret < 0) {
1047 bdrv_delete(bs_rw);
1048 /* try to re-open read-only */
1049 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001050 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1051 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001052 if (ret < 0) {
1053 bdrv_delete(bs_ro);
1054 /* drive not functional anymore */
1055 bs->drv = NULL;
1056 return ret;
1057 }
1058 bs->backing_hd = bs_ro;
1059 return rw_ret;
1060 }
1061 bs->backing_hd = bs_rw;
bellard33e39632003-07-06 17:15:21 +00001062 }
bellardea2384d2004-08-01 21:59:26 +00001063
Jan Kiszka6ea44302009-11-30 18:21:19 +01001064 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001065 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001066
Kevin Wolf8a426612010-07-16 17:17:01 +02001067 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001068 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001069
1070 if (bdrv_read(bs, sector, buf, n) != 0) {
1071 ret = -EIO;
1072 goto ro_cleanup;
1073 }
1074
1075 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1076 ret = -EIO;
1077 goto ro_cleanup;
1078 }
bellardea2384d2004-08-01 21:59:26 +00001079 }
1080 }
bellard95389c82005-12-18 18:28:15 +00001081
Christoph Hellwig1d449522010-01-17 12:32:30 +01001082 if (drv->bdrv_make_empty) {
1083 ret = drv->bdrv_make_empty(bs);
1084 bdrv_flush(bs);
1085 }
bellard95389c82005-12-18 18:28:15 +00001086
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001087 /*
1088 * Make sure all data we wrote to the backing device is actually
1089 * stable on disk.
1090 */
1091 if (bs->backing_hd)
1092 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001093
1094ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001095 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001096
1097 if (ro) {
1098 /* re-open as RO */
1099 bdrv_delete(bs->backing_hd);
1100 bs->backing_hd = NULL;
1101 bs_ro = bdrv_new("");
Kevin Wolfee181192010-08-05 13:05:22 +02001102 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1103 backing_drv);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001104 if (ret < 0) {
1105 bdrv_delete(bs_ro);
1106 /* drive not functional anymore */
1107 bs->drv = NULL;
1108 return ret;
1109 }
1110 bs->backing_hd = bs_ro;
1111 bs->backing_hd->keep_read_only = 0;
1112 }
1113
Christoph Hellwig1d449522010-01-17 12:32:30 +01001114 return ret;
bellard33e39632003-07-06 17:15:21 +00001115}
1116
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001117void bdrv_commit_all(void)
1118{
1119 BlockDriverState *bs;
1120
1121 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1122 bdrv_commit(bs);
1123 }
1124}
1125
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001126struct BdrvTrackedRequest {
1127 BlockDriverState *bs;
1128 int64_t sector_num;
1129 int nb_sectors;
1130 bool is_write;
1131 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001132 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001133 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001134};
1135
1136/**
1137 * Remove an active request from the tracked requests list
1138 *
1139 * This function should be called when a tracked request is completing.
1140 */
1141static void tracked_request_end(BdrvTrackedRequest *req)
1142{
1143 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001144 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001145}
1146
1147/**
1148 * Add an active request to the tracked requests list
1149 */
1150static void tracked_request_begin(BdrvTrackedRequest *req,
1151 BlockDriverState *bs,
1152 int64_t sector_num,
1153 int nb_sectors, bool is_write)
1154{
1155 *req = (BdrvTrackedRequest){
1156 .bs = bs,
1157 .sector_num = sector_num,
1158 .nb_sectors = nb_sectors,
1159 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001160 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001161 };
1162
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001163 qemu_co_queue_init(&req->wait_queue);
1164
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001165 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1166}
1167
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001168/**
1169 * Round a region to cluster boundaries
1170 */
1171static void round_to_clusters(BlockDriverState *bs,
1172 int64_t sector_num, int nb_sectors,
1173 int64_t *cluster_sector_num,
1174 int *cluster_nb_sectors)
1175{
1176 BlockDriverInfo bdi;
1177
1178 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1179 *cluster_sector_num = sector_num;
1180 *cluster_nb_sectors = nb_sectors;
1181 } else {
1182 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1183 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1184 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1185 nb_sectors, c);
1186 }
1187}
1188
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001189static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1190 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001191 /* aaaa bbbb */
1192 if (sector_num >= req->sector_num + req->nb_sectors) {
1193 return false;
1194 }
1195 /* bbbb aaaa */
1196 if (req->sector_num >= sector_num + nb_sectors) {
1197 return false;
1198 }
1199 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001200}
1201
1202static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1203 int64_t sector_num, int nb_sectors)
1204{
1205 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001206 int64_t cluster_sector_num;
1207 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001208 bool retry;
1209
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001210 /* If we touch the same cluster it counts as an overlap. This guarantees
1211 * that allocating writes will be serialized and not race with each other
1212 * for the same cluster. For example, in copy-on-read it ensures that the
1213 * CoR read and write operations are atomic and guest writes cannot
1214 * interleave between them.
1215 */
1216 round_to_clusters(bs, sector_num, nb_sectors,
1217 &cluster_sector_num, &cluster_nb_sectors);
1218
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001219 do {
1220 retry = false;
1221 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001222 if (tracked_request_overlaps(req, cluster_sector_num,
1223 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001224 /* Hitting this means there was a reentrant request, for
1225 * example, a block driver issuing nested requests. This must
1226 * never happen since it means deadlock.
1227 */
1228 assert(qemu_coroutine_self() != req->co);
1229
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001230 qemu_co_queue_wait(&req->wait_queue);
1231 retry = true;
1232 break;
1233 }
1234 }
1235 } while (retry);
1236}
1237
Kevin Wolf756e6732010-01-12 12:55:17 +01001238/*
1239 * Return values:
1240 * 0 - success
1241 * -EINVAL - backing format specified, but no file
1242 * -ENOSPC - can't update the backing file because no space is left in the
1243 * image file header
1244 * -ENOTSUP - format driver doesn't support changing the backing file
1245 */
1246int bdrv_change_backing_file(BlockDriverState *bs,
1247 const char *backing_file, const char *backing_fmt)
1248{
1249 BlockDriver *drv = bs->drv;
1250
1251 if (drv->bdrv_change_backing_file != NULL) {
1252 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1253 } else {
1254 return -ENOTSUP;
1255 }
1256}
1257
aliguori71d07702009-03-03 17:37:16 +00001258static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1259 size_t size)
1260{
1261 int64_t len;
1262
1263 if (!bdrv_is_inserted(bs))
1264 return -ENOMEDIUM;
1265
1266 if (bs->growable)
1267 return 0;
1268
1269 len = bdrv_getlength(bs);
1270
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001271 if (offset < 0)
1272 return -EIO;
1273
1274 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001275 return -EIO;
1276
1277 return 0;
1278}
1279
1280static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1281 int nb_sectors)
1282{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001283 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1284 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001285}
1286
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001287typedef struct RwCo {
1288 BlockDriverState *bs;
1289 int64_t sector_num;
1290 int nb_sectors;
1291 QEMUIOVector *qiov;
1292 bool is_write;
1293 int ret;
1294} RwCo;
1295
1296static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1297{
1298 RwCo *rwco = opaque;
1299
1300 if (!rwco->is_write) {
1301 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001302 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001303 } else {
1304 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001305 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001306 }
1307}
1308
1309/*
1310 * Process a synchronous request using coroutines
1311 */
1312static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1313 int nb_sectors, bool is_write)
1314{
1315 QEMUIOVector qiov;
1316 struct iovec iov = {
1317 .iov_base = (void *)buf,
1318 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1319 };
1320 Coroutine *co;
1321 RwCo rwco = {
1322 .bs = bs,
1323 .sector_num = sector_num,
1324 .nb_sectors = nb_sectors,
1325 .qiov = &qiov,
1326 .is_write = is_write,
1327 .ret = NOT_DONE,
1328 };
1329
1330 qemu_iovec_init_external(&qiov, &iov, 1);
1331
1332 if (qemu_in_coroutine()) {
1333 /* Fast-path if already in coroutine context */
1334 bdrv_rw_co_entry(&rwco);
1335 } else {
1336 co = qemu_coroutine_create(bdrv_rw_co_entry);
1337 qemu_coroutine_enter(co, &rwco);
1338 while (rwco.ret == NOT_DONE) {
1339 qemu_aio_wait();
1340 }
1341 }
1342 return rwco.ret;
1343}
1344
bellard19cb3732006-08-19 11:45:59 +00001345/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001346int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001347 uint8_t *buf, int nb_sectors)
1348{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001349 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001350}
1351
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001352static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001353 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001354{
1355 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001356 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001357
Jan Kiszka6ea44302009-11-30 18:21:19 +01001358 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001359 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001360
1361 for (; start <= end; start++) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01001362 idx = start / (sizeof(unsigned long) * 8);
1363 bit = start % (sizeof(unsigned long) * 8);
1364 val = bs->dirty_bitmap[idx];
1365 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001366 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001367 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001368 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02001369 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001370 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001371 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02001372 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02001373 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02001374 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01001375 }
1376 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001377 }
1378}
1379
ths5fafdf22007-09-16 21:08:06 +00001380/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00001381 -EIO generic I/O error (may happen for all errors)
1382 -ENOMEDIUM No media inserted.
1383 -EINVAL Invalid sector number or nb_sectors
1384 -EACCES Trying to write a read-only device
1385*/
ths5fafdf22007-09-16 21:08:06 +00001386int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001387 const uint8_t *buf, int nb_sectors)
1388{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001389 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00001390}
1391
aliguorieda578e2009-03-12 19:57:16 +00001392int bdrv_pread(BlockDriverState *bs, int64_t offset,
1393 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001394{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001395 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001396 int len, nb_sectors, count;
1397 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001398 int ret;
bellard83f64092006-08-01 16:21:11 +00001399
1400 count = count1;
1401 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001402 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001403 if (len > count)
1404 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001405 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001406 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001407 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1408 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001409 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00001410 count -= len;
1411 if (count == 0)
1412 return count1;
1413 sector_num++;
1414 buf += len;
1415 }
1416
1417 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001418 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001419 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001420 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1421 return ret;
bellard83f64092006-08-01 16:21:11 +00001422 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001423 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001424 buf += len;
1425 count -= len;
1426 }
1427
1428 /* add data from the last sector */
1429 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001430 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1431 return ret;
bellard83f64092006-08-01 16:21:11 +00001432 memcpy(buf, tmp_buf, count);
1433 }
1434 return count1;
1435}
1436
aliguorieda578e2009-03-12 19:57:16 +00001437int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1438 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00001439{
Jan Kiszka6ea44302009-11-30 18:21:19 +01001440 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00001441 int len, nb_sectors, count;
1442 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001443 int ret;
bellard83f64092006-08-01 16:21:11 +00001444
1445 count = count1;
1446 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001447 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00001448 if (len > count)
1449 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001450 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001451 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001452 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1453 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001454 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001455 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1456 return ret;
bellard83f64092006-08-01 16:21:11 +00001457 count -= len;
1458 if (count == 0)
1459 return count1;
1460 sector_num++;
1461 buf += len;
1462 }
1463
1464 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01001465 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001466 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001467 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1468 return ret;
bellard83f64092006-08-01 16:21:11 +00001469 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01001470 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00001471 buf += len;
1472 count -= len;
1473 }
1474
1475 /* add data from the last sector */
1476 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001477 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1478 return ret;
bellard83f64092006-08-01 16:21:11 +00001479 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01001480 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1481 return ret;
bellard83f64092006-08-01 16:21:11 +00001482 }
1483 return count1;
1484}
bellard83f64092006-08-01 16:21:11 +00001485
Kevin Wolff08145f2010-06-16 16:38:15 +02001486/*
1487 * Writes to the file and ensures that no writes are reordered across this
1488 * request (acts as a barrier)
1489 *
1490 * Returns 0 on success, -errno in error cases.
1491 */
1492int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1493 const void *buf, int count)
1494{
1495 int ret;
1496
1497 ret = bdrv_pwrite(bs, offset, buf, count);
1498 if (ret < 0) {
1499 return ret;
1500 }
1501
Stefan Hajnoczi92196b22011-08-04 12:26:52 +01001502 /* No flush needed for cache modes that use O_DSYNC */
1503 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
Kevin Wolff08145f2010-06-16 16:38:15 +02001504 bdrv_flush(bs);
1505 }
1506
1507 return 0;
1508}
1509
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001510static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00001511 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1512{
1513 /* Perform I/O through a temporary buffer so that users who scribble over
1514 * their read buffer while the operation is in progress do not end up
1515 * modifying the image file. This is critical for zero-copy guest I/O
1516 * where anything might happen inside guest memory.
1517 */
1518 void *bounce_buffer;
1519
1520 struct iovec iov;
1521 QEMUIOVector bounce_qiov;
1522 int64_t cluster_sector_num;
1523 int cluster_nb_sectors;
1524 size_t skip_bytes;
1525 int ret;
1526
1527 /* Cover entire cluster so no additional backing file I/O is required when
1528 * allocating cluster in the image file.
1529 */
1530 round_to_clusters(bs, sector_num, nb_sectors,
1531 &cluster_sector_num, &cluster_nb_sectors);
1532
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001533 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1534 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001535
1536 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1537 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1538 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1539
1540 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1541 &bounce_qiov);
1542 if (ret < 0) {
1543 goto err;
1544 }
1545
1546 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1547 &bounce_qiov);
1548 if (ret < 0) {
1549 /* It might be okay to ignore write errors for guest requests. If this
1550 * is a deliberate copy-on-read then we don't want to ignore the error.
1551 * Simply report it in all cases.
1552 */
1553 goto err;
1554 }
1555
1556 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1557 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1558 nb_sectors * BDRV_SECTOR_SIZE);
1559
1560err:
1561 qemu_vfree(bounce_buffer);
1562 return ret;
1563}
1564
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001565/*
1566 * Handle a read request in coroutine context
1567 */
1568static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001569 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1570 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02001571{
1572 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001573 BdrvTrackedRequest req;
1574 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001575
Kevin Wolfda1fa912011-07-14 17:27:13 +02001576 if (!drv) {
1577 return -ENOMEDIUM;
1578 }
1579 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1580 return -EIO;
1581 }
1582
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001583 /* throttling disk read I/O */
1584 if (bs->io_limits_enabled) {
1585 bdrv_io_limits_intercept(bs, false, nb_sectors);
1586 }
1587
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001588 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001589 flags |= BDRV_REQ_COPY_ON_READ;
1590 }
1591 if (flags & BDRV_REQ_COPY_ON_READ) {
1592 bs->copy_on_read_in_flight++;
1593 }
1594
1595 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001596 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1597 }
1598
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001599 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001600
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001601 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00001602 int pnum;
1603
1604 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1605 if (ret < 0) {
1606 goto out;
1607 }
1608
1609 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001610 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001611 goto out;
1612 }
1613 }
1614
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001615 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00001616
1617out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001618 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001619
1620 if (flags & BDRV_REQ_COPY_ON_READ) {
1621 bs->copy_on_read_in_flight--;
1622 }
1623
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001624 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001625}
1626
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001627int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02001628 int nb_sectors, QEMUIOVector *qiov)
1629{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001630 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02001631
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001632 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1633}
1634
1635int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1636 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1637{
1638 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1639
1640 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1641 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001642}
1643
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001644static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1645 int64_t sector_num, int nb_sectors)
1646{
1647 BlockDriver *drv = bs->drv;
1648 QEMUIOVector qiov;
1649 struct iovec iov;
1650 int ret;
1651
1652 /* First try the efficient write zeroes operation */
1653 if (drv->bdrv_co_write_zeroes) {
1654 return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1655 }
1656
1657 /* Fall back to bounce buffer if write zeroes is unsupported */
1658 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1659 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1660 memset(iov.iov_base, 0, iov.iov_len);
1661 qemu_iovec_init_external(&qiov, &iov, 1);
1662
1663 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1664
1665 qemu_vfree(iov.iov_base);
1666 return ret;
1667}
1668
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001669/*
1670 * Handle a write request in coroutine context
1671 */
1672static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001673 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1674 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001675{
1676 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001677 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001678 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001679
1680 if (!bs->drv) {
1681 return -ENOMEDIUM;
1682 }
1683 if (bs->read_only) {
1684 return -EACCES;
1685 }
1686 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1687 return -EIO;
1688 }
1689
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001690 /* throttling disk write I/O */
1691 if (bs->io_limits_enabled) {
1692 bdrv_io_limits_intercept(bs, true, nb_sectors);
1693 }
1694
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001695 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001696 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1697 }
1698
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001699 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1700
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001701 if (flags & BDRV_REQ_ZERO_WRITE) {
1702 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1703 } else {
1704 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1705 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001706
Kevin Wolfda1fa912011-07-14 17:27:13 +02001707 if (bs->dirty_bitmap) {
1708 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1709 }
1710
1711 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1712 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1713 }
1714
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001715 tracked_request_end(&req);
1716
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01001717 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02001718}
1719
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001720int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1721 int nb_sectors, QEMUIOVector *qiov)
1722{
1723 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1724
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001725 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1726}
1727
1728int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1729 int64_t sector_num, int nb_sectors)
1730{
1731 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1732
1733 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1734 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01001735}
1736
bellard83f64092006-08-01 16:21:11 +00001737/**
bellard83f64092006-08-01 16:21:11 +00001738 * Truncate file to 'offset' bytes (needed only for file protocols)
1739 */
1740int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1741{
1742 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001743 int ret;
bellard83f64092006-08-01 16:21:11 +00001744 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001745 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00001746 if (!drv->bdrv_truncate)
1747 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02001748 if (bs->read_only)
1749 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02001750 if (bdrv_in_use(bs))
1751 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001752 ret = drv->bdrv_truncate(bs, offset);
1753 if (ret == 0) {
1754 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02001755 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001756 }
1757 return ret;
bellard83f64092006-08-01 16:21:11 +00001758}
1759
1760/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08001761 * Length of a allocated file in bytes. Sparse files are counted by actual
1762 * allocated space. Return < 0 if error or unknown.
1763 */
1764int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1765{
1766 BlockDriver *drv = bs->drv;
1767 if (!drv) {
1768 return -ENOMEDIUM;
1769 }
1770 if (drv->bdrv_get_allocated_file_size) {
1771 return drv->bdrv_get_allocated_file_size(bs);
1772 }
1773 if (bs->file) {
1774 return bdrv_get_allocated_file_size(bs->file);
1775 }
1776 return -ENOTSUP;
1777}
1778
1779/**
bellard83f64092006-08-01 16:21:11 +00001780 * Length of a file in bytes. Return < 0 if error or unknown.
1781 */
1782int64_t bdrv_getlength(BlockDriverState *bs)
1783{
1784 BlockDriver *drv = bs->drv;
1785 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00001786 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001787
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001788 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001789 if (drv->bdrv_getlength) {
1790 return drv->bdrv_getlength(bs);
1791 }
bellard83f64092006-08-01 16:21:11 +00001792 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01001793 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00001794}
1795
bellard19cb3732006-08-19 11:45:59 +00001796/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00001797void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00001798{
bellard19cb3732006-08-19 11:45:59 +00001799 int64_t length;
1800 length = bdrv_getlength(bs);
1801 if (length < 0)
1802 length = 0;
1803 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01001804 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00001805 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00001806}
bellardcf989512004-02-16 21:56:36 +00001807
aliguorif3d54fc2008-11-25 21:50:24 +00001808struct partition {
1809 uint8_t boot_ind; /* 0x80 - active */
1810 uint8_t head; /* starting head */
1811 uint8_t sector; /* starting sector */
1812 uint8_t cyl; /* starting cylinder */
1813 uint8_t sys_ind; /* What partition type */
1814 uint8_t end_head; /* end head */
1815 uint8_t end_sector; /* end sector */
1816 uint8_t end_cyl; /* end cylinder */
1817 uint32_t start_sect; /* starting sector counting from 0 */
1818 uint32_t nr_sects; /* nr of sectors in partition */
Stefan Weil541dc0d2011-08-31 12:38:01 +02001819} QEMU_PACKED;
aliguorif3d54fc2008-11-25 21:50:24 +00001820
1821/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1822static int guess_disk_lchs(BlockDriverState *bs,
1823 int *pcylinders, int *pheads, int *psectors)
1824{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001825 uint8_t buf[BDRV_SECTOR_SIZE];
aliguorif3d54fc2008-11-25 21:50:24 +00001826 int ret, i, heads, sectors, cylinders;
1827 struct partition *p;
1828 uint32_t nr_sects;
blueswir1a38131b2008-12-05 17:56:40 +00001829 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001830
1831 bdrv_get_geometry(bs, &nb_sectors);
1832
1833 ret = bdrv_read(bs, 0, buf, 1);
1834 if (ret < 0)
1835 return -1;
1836 /* test msdos magic */
1837 if (buf[510] != 0x55 || buf[511] != 0xaa)
1838 return -1;
1839 for(i = 0; i < 4; i++) {
1840 p = ((struct partition *)(buf + 0x1be)) + i;
1841 nr_sects = le32_to_cpu(p->nr_sects);
1842 if (nr_sects && p->end_head) {
1843 /* We make the assumption that the partition terminates on
1844 a cylinder boundary */
1845 heads = p->end_head + 1;
1846 sectors = p->end_sector & 63;
1847 if (sectors == 0)
1848 continue;
1849 cylinders = nb_sectors / (heads * sectors);
1850 if (cylinders < 1 || cylinders > 16383)
1851 continue;
1852 *pheads = heads;
1853 *psectors = sectors;
1854 *pcylinders = cylinders;
1855#if 0
1856 printf("guessed geometry: LCHS=%d %d %d\n",
1857 cylinders, heads, sectors);
1858#endif
1859 return 0;
1860 }
1861 }
1862 return -1;
1863}
1864
1865void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1866{
1867 int translation, lba_detected = 0;
1868 int cylinders, heads, secs;
blueswir1a38131b2008-12-05 17:56:40 +00001869 uint64_t nb_sectors;
aliguorif3d54fc2008-11-25 21:50:24 +00001870
1871 /* if a geometry hint is available, use it */
1872 bdrv_get_geometry(bs, &nb_sectors);
1873 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1874 translation = bdrv_get_translation_hint(bs);
1875 if (cylinders != 0) {
1876 *pcyls = cylinders;
1877 *pheads = heads;
1878 *psecs = secs;
1879 } else {
1880 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1881 if (heads > 16) {
1882 /* if heads > 16, it means that a BIOS LBA
1883 translation was active, so the default
1884 hardware geometry is OK */
1885 lba_detected = 1;
1886 goto default_geometry;
1887 } else {
1888 *pcyls = cylinders;
1889 *pheads = heads;
1890 *psecs = secs;
1891 /* disable any translation to be in sync with
1892 the logical geometry */
1893 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1894 bdrv_set_translation_hint(bs,
1895 BIOS_ATA_TRANSLATION_NONE);
1896 }
1897 }
1898 } else {
1899 default_geometry:
1900 /* if no geometry, use a standard physical disk geometry */
1901 cylinders = nb_sectors / (16 * 63);
1902
1903 if (cylinders > 16383)
1904 cylinders = 16383;
1905 else if (cylinders < 2)
1906 cylinders = 2;
1907 *pcyls = cylinders;
1908 *pheads = 16;
1909 *psecs = 63;
1910 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1911 if ((*pcyls * *pheads) <= 131072) {
1912 bdrv_set_translation_hint(bs,
1913 BIOS_ATA_TRANSLATION_LARGE);
1914 } else {
1915 bdrv_set_translation_hint(bs,
1916 BIOS_ATA_TRANSLATION_LBA);
1917 }
1918 }
1919 }
1920 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1921 }
1922}
1923
ths5fafdf22007-09-16 21:08:06 +00001924void bdrv_set_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001925 int cyls, int heads, int secs)
1926{
1927 bs->cyls = cyls;
1928 bs->heads = heads;
1929 bs->secs = secs;
1930}
1931
bellard46d47672004-11-16 01:45:27 +00001932void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1933{
1934 bs->translation = translation;
1935}
1936
ths5fafdf22007-09-16 21:08:06 +00001937void bdrv_get_geometry_hint(BlockDriverState *bs,
bellardb3380822004-03-14 21:38:54 +00001938 int *pcyls, int *pheads, int *psecs)
1939{
1940 *pcyls = bs->cyls;
1941 *pheads = bs->heads;
1942 *psecs = bs->secs;
1943}
1944
Zhi Yong Wu0563e192011-11-03 16:57:25 +08001945/* throttling disk io limits */
1946void bdrv_set_io_limits(BlockDriverState *bs,
1947 BlockIOLimit *io_limits)
1948{
1949 bs->io_limits = *io_limits;
1950 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1951}
1952
Blue Swirl5bbdbb42011-02-12 20:43:32 +00001953/* Recognize floppy formats */
1954typedef struct FDFormat {
1955 FDriveType drive;
1956 uint8_t last_sect;
1957 uint8_t max_track;
1958 uint8_t max_head;
1959} FDFormat;
1960
1961static const FDFormat fd_formats[] = {
1962 /* First entry is default format */
1963 /* 1.44 MB 3"1/2 floppy disks */
1964 { FDRIVE_DRV_144, 18, 80, 1, },
1965 { FDRIVE_DRV_144, 20, 80, 1, },
1966 { FDRIVE_DRV_144, 21, 80, 1, },
1967 { FDRIVE_DRV_144, 21, 82, 1, },
1968 { FDRIVE_DRV_144, 21, 83, 1, },
1969 { FDRIVE_DRV_144, 22, 80, 1, },
1970 { FDRIVE_DRV_144, 23, 80, 1, },
1971 { FDRIVE_DRV_144, 24, 80, 1, },
1972 /* 2.88 MB 3"1/2 floppy disks */
1973 { FDRIVE_DRV_288, 36, 80, 1, },
1974 { FDRIVE_DRV_288, 39, 80, 1, },
1975 { FDRIVE_DRV_288, 40, 80, 1, },
1976 { FDRIVE_DRV_288, 44, 80, 1, },
1977 { FDRIVE_DRV_288, 48, 80, 1, },
1978 /* 720 kB 3"1/2 floppy disks */
1979 { FDRIVE_DRV_144, 9, 80, 1, },
1980 { FDRIVE_DRV_144, 10, 80, 1, },
1981 { FDRIVE_DRV_144, 10, 82, 1, },
1982 { FDRIVE_DRV_144, 10, 83, 1, },
1983 { FDRIVE_DRV_144, 13, 80, 1, },
1984 { FDRIVE_DRV_144, 14, 80, 1, },
1985 /* 1.2 MB 5"1/4 floppy disks */
1986 { FDRIVE_DRV_120, 15, 80, 1, },
1987 { FDRIVE_DRV_120, 18, 80, 1, },
1988 { FDRIVE_DRV_120, 18, 82, 1, },
1989 { FDRIVE_DRV_120, 18, 83, 1, },
1990 { FDRIVE_DRV_120, 20, 80, 1, },
1991 /* 720 kB 5"1/4 floppy disks */
1992 { FDRIVE_DRV_120, 9, 80, 1, },
1993 { FDRIVE_DRV_120, 11, 80, 1, },
1994 /* 360 kB 5"1/4 floppy disks */
1995 { FDRIVE_DRV_120, 9, 40, 1, },
1996 { FDRIVE_DRV_120, 9, 40, 0, },
1997 { FDRIVE_DRV_120, 10, 41, 1, },
1998 { FDRIVE_DRV_120, 10, 42, 1, },
1999 /* 320 kB 5"1/4 floppy disks */
2000 { FDRIVE_DRV_120, 8, 40, 1, },
2001 { FDRIVE_DRV_120, 8, 40, 0, },
2002 /* 360 kB must match 5"1/4 better than 3"1/2... */
2003 { FDRIVE_DRV_144, 9, 80, 0, },
2004 /* end */
2005 { FDRIVE_DRV_NONE, -1, -1, 0, },
2006};
2007
2008void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2009 int *max_track, int *last_sect,
2010 FDriveType drive_in, FDriveType *drive)
2011{
2012 const FDFormat *parse;
2013 uint64_t nb_sectors, size;
2014 int i, first_match, match;
2015
2016 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2017 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2018 /* User defined disk */
2019 } else {
2020 bdrv_get_geometry(bs, &nb_sectors);
2021 match = -1;
2022 first_match = -1;
2023 for (i = 0; ; i++) {
2024 parse = &fd_formats[i];
2025 if (parse->drive == FDRIVE_DRV_NONE) {
2026 break;
2027 }
2028 if (drive_in == parse->drive ||
2029 drive_in == FDRIVE_DRV_NONE) {
2030 size = (parse->max_head + 1) * parse->max_track *
2031 parse->last_sect;
2032 if (nb_sectors == size) {
2033 match = i;
2034 break;
2035 }
2036 if (first_match == -1) {
2037 first_match = i;
2038 }
2039 }
2040 }
2041 if (match == -1) {
2042 if (first_match == -1) {
2043 match = 1;
2044 } else {
2045 match = first_match;
2046 }
2047 parse = &fd_formats[match];
2048 }
2049 *nb_heads = parse->max_head + 1;
2050 *max_track = parse->max_track;
2051 *last_sect = parse->last_sect;
2052 *drive = parse->drive;
2053 }
2054}
2055
bellard46d47672004-11-16 01:45:27 +00002056int bdrv_get_translation_hint(BlockDriverState *bs)
2057{
2058 return bs->translation;
2059}
2060
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002061void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2062 BlockErrorAction on_write_error)
2063{
2064 bs->on_read_error = on_read_error;
2065 bs->on_write_error = on_write_error;
2066}
2067
2068BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2069{
2070 return is_read ? bs->on_read_error : bs->on_write_error;
2071}
2072
bellardb3380822004-03-14 21:38:54 +00002073int bdrv_is_read_only(BlockDriverState *bs)
2074{
2075 return bs->read_only;
2076}
2077
ths985a03b2007-12-24 16:10:43 +00002078int bdrv_is_sg(BlockDriverState *bs)
2079{
2080 return bs->sg;
2081}
2082
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002083int bdrv_enable_write_cache(BlockDriverState *bs)
2084{
2085 return bs->enable_write_cache;
2086}
2087
bellardea2384d2004-08-01 21:59:26 +00002088int bdrv_is_encrypted(BlockDriverState *bs)
2089{
2090 if (bs->backing_hd && bs->backing_hd->encrypted)
2091 return 1;
2092 return bs->encrypted;
2093}
2094
aliguoric0f4ce72009-03-05 23:01:01 +00002095int bdrv_key_required(BlockDriverState *bs)
2096{
2097 BlockDriverState *backing_hd = bs->backing_hd;
2098
2099 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2100 return 1;
2101 return (bs->encrypted && !bs->valid_key);
2102}
2103
bellardea2384d2004-08-01 21:59:26 +00002104int bdrv_set_key(BlockDriverState *bs, const char *key)
2105{
2106 int ret;
2107 if (bs->backing_hd && bs->backing_hd->encrypted) {
2108 ret = bdrv_set_key(bs->backing_hd, key);
2109 if (ret < 0)
2110 return ret;
2111 if (!bs->encrypted)
2112 return 0;
2113 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002114 if (!bs->encrypted) {
2115 return -EINVAL;
2116 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2117 return -ENOMEDIUM;
2118 }
aliguoric0f4ce72009-03-05 23:01:01 +00002119 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002120 if (ret < 0) {
2121 bs->valid_key = 0;
2122 } else if (!bs->valid_key) {
2123 bs->valid_key = 1;
2124 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002125 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002126 }
aliguoric0f4ce72009-03-05 23:01:01 +00002127 return ret;
bellardea2384d2004-08-01 21:59:26 +00002128}
2129
2130void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2131{
bellard19cb3732006-08-19 11:45:59 +00002132 if (!bs->drv) {
bellardea2384d2004-08-01 21:59:26 +00002133 buf[0] = '\0';
2134 } else {
2135 pstrcpy(buf, buf_size, bs->drv->format_name);
2136 }
2137}
2138
ths5fafdf22007-09-16 21:08:06 +00002139void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002140 void *opaque)
2141{
2142 BlockDriver *drv;
2143
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002144 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002145 it(opaque, drv->format_name);
2146 }
2147}
2148
bellardb3380822004-03-14 21:38:54 +00002149BlockDriverState *bdrv_find(const char *name)
2150{
2151 BlockDriverState *bs;
2152
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002153 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2154 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002155 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002156 }
bellardb3380822004-03-14 21:38:54 +00002157 }
2158 return NULL;
2159}
2160
Markus Armbruster2f399b02010-06-02 18:55:20 +02002161BlockDriverState *bdrv_next(BlockDriverState *bs)
2162{
2163 if (!bs) {
2164 return QTAILQ_FIRST(&bdrv_states);
2165 }
2166 return QTAILQ_NEXT(bs, list);
2167}
2168
aliguori51de9762009-03-05 23:00:43 +00002169void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002170{
2171 BlockDriverState *bs;
2172
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002173 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002174 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002175 }
2176}
2177
bellardea2384d2004-08-01 21:59:26 +00002178const char *bdrv_get_device_name(BlockDriverState *bs)
2179{
2180 return bs->device_name;
2181}
2182
aliguoric6ca28d2008-10-06 13:55:43 +00002183void bdrv_flush_all(void)
2184{
2185 BlockDriverState *bs;
2186
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002187 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Markus Armbrusterc602a482011-08-03 15:08:10 +02002188 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
aliguoric6ca28d2008-10-06 13:55:43 +00002189 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002190 }
2191 }
aliguoric6ca28d2008-10-06 13:55:43 +00002192}
2193
Kevin Wolff2feebb2010-04-14 17:30:35 +02002194int bdrv_has_zero_init(BlockDriverState *bs)
2195{
2196 assert(bs->drv);
2197
Kevin Wolf336c1c12010-07-28 11:26:29 +02002198 if (bs->drv->bdrv_has_zero_init) {
2199 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002200 }
2201
2202 return 1;
2203}
2204
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002205typedef struct BdrvCoIsAllocatedData {
2206 BlockDriverState *bs;
2207 int64_t sector_num;
2208 int nb_sectors;
2209 int *pnum;
2210 int ret;
2211 bool done;
2212} BdrvCoIsAllocatedData;
2213
thsf58c7b32008-06-05 21:53:49 +00002214/*
2215 * Returns true iff the specified sector is present in the disk image. Drivers
2216 * not implementing the functionality are assumed to not support backing files,
2217 * hence all their sectors are reported as allocated.
2218 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002219 * If 'sector_num' is beyond the end of the disk image the return value is 0
2220 * and 'pnum' is set to 0.
2221 *
thsf58c7b32008-06-05 21:53:49 +00002222 * 'pnum' is set to the number of sectors (including and immediately following
2223 * the specified sector) that are known to be in the same
2224 * allocated/unallocated state.
2225 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002226 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2227 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002228 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002229int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2230 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002231{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002232 int64_t n;
2233
2234 if (sector_num >= bs->total_sectors) {
2235 *pnum = 0;
2236 return 0;
2237 }
2238
2239 n = bs->total_sectors - sector_num;
2240 if (n < nb_sectors) {
2241 nb_sectors = n;
2242 }
2243
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002244 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002245 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002246 return 1;
2247 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002248
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002249 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2250}
2251
2252/* Coroutine wrapper for bdrv_is_allocated() */
2253static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2254{
2255 BdrvCoIsAllocatedData *data = opaque;
2256 BlockDriverState *bs = data->bs;
2257
2258 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2259 data->pnum);
2260 data->done = true;
2261}
2262
2263/*
2264 * Synchronous wrapper around bdrv_co_is_allocated().
2265 *
2266 * See bdrv_co_is_allocated() for details.
2267 */
2268int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2269 int *pnum)
2270{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002271 Coroutine *co;
2272 BdrvCoIsAllocatedData data = {
2273 .bs = bs,
2274 .sector_num = sector_num,
2275 .nb_sectors = nb_sectors,
2276 .pnum = pnum,
2277 .done = false,
2278 };
2279
2280 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2281 qemu_coroutine_enter(co, &data);
2282 while (!data.done) {
2283 qemu_aio_wait();
2284 }
2285 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002286}
2287
Luiz Capitulino2582bfe2010-02-03 12:41:01 -02002288void bdrv_mon_event(const BlockDriverState *bdrv,
2289 BlockMonEventAction action, int is_read)
2290{
2291 QObject *data;
2292 const char *action_str;
2293
2294 switch (action) {
2295 case BDRV_ACTION_REPORT:
2296 action_str = "report";
2297 break;
2298 case BDRV_ACTION_IGNORE:
2299 action_str = "ignore";
2300 break;
2301 case BDRV_ACTION_STOP:
2302 action_str = "stop";
2303 break;
2304 default:
2305 abort();
2306 }
2307
2308 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2309 bdrv->device_name,
2310 action_str,
2311 is_read ? "read" : "write");
2312 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2313
2314 qobject_decref(data);
2315}
2316
Luiz Capitulinob2023812011-09-21 17:16:47 -03002317BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002318{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002319 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002320 BlockDriverState *bs;
2321
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002322 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002323 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002324
Luiz Capitulinob2023812011-09-21 17:16:47 -03002325 info->value = g_malloc0(sizeof(*info->value));
2326 info->value->device = g_strdup(bs->device_name);
2327 info->value->type = g_strdup("unknown");
2328 info->value->locked = bdrv_dev_is_medium_locked(bs);
2329 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002330
Markus Armbrustere4def802011-09-06 18:58:53 +02002331 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002332 info->value->has_tray_open = true;
2333 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002334 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002335
2336 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002337 info->value->has_io_status = true;
2338 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002339 }
2340
bellard19cb3732006-08-19 11:45:59 +00002341 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002342 info->value->has_inserted = true;
2343 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2344 info->value->inserted->file = g_strdup(bs->filename);
2345 info->value->inserted->ro = bs->read_only;
2346 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2347 info->value->inserted->encrypted = bs->encrypted;
2348 if (bs->backing_file[0]) {
2349 info->value->inserted->has_backing_file = true;
2350 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002351 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002352
2353 if (bs->io_limits_enabled) {
2354 info->value->inserted->bps =
2355 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2356 info->value->inserted->bps_rd =
2357 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2358 info->value->inserted->bps_wr =
2359 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2360 info->value->inserted->iops =
2361 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2362 info->value->inserted->iops_rd =
2363 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2364 info->value->inserted->iops_wr =
2365 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2366 }
bellardb3380822004-03-14 21:38:54 +00002367 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002368
2369 /* XXX: waiting for the qapi to support GSList */
2370 if (!cur_item) {
2371 head = cur_item = info;
2372 } else {
2373 cur_item->next = info;
2374 cur_item = info;
2375 }
bellardb3380822004-03-14 21:38:54 +00002376 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002377
Luiz Capitulinob2023812011-09-21 17:16:47 -03002378 return head;
bellardb3380822004-03-14 21:38:54 +00002379}
thsa36e69d2007-12-02 05:18:19 +00002380
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002381/* Consider exposing this as a full fledged QMP command */
2382static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002383{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002384 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002385
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002386 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002387
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002388 if (bs->device_name[0]) {
2389 s->has_device = true;
2390 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002391 }
2392
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002393 s->stats = g_malloc0(sizeof(*s->stats));
2394 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2395 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2396 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2397 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2398 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2399 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2400 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2401 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2402 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2403
Kevin Wolf294cc352010-04-28 14:34:01 +02002404 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002405 s->has_parent = true;
2406 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002407 }
2408
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002409 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002410}
2411
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002412BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002413{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002414 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002415 BlockDriverState *bs;
2416
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002417 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002418 BlockStatsList *info = g_malloc0(sizeof(*info));
2419 info->value = qmp_query_blockstat(bs, NULL);
2420
2421 /* XXX: waiting for the qapi to support GSList */
2422 if (!cur_item) {
2423 head = cur_item = info;
2424 } else {
2425 cur_item->next = info;
2426 cur_item = info;
2427 }
thsa36e69d2007-12-02 05:18:19 +00002428 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002429
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002430 return head;
thsa36e69d2007-12-02 05:18:19 +00002431}
bellardea2384d2004-08-01 21:59:26 +00002432
aliguori045df332009-03-05 23:00:48 +00002433const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2434{
2435 if (bs->backing_hd && bs->backing_hd->encrypted)
2436 return bs->backing_file;
2437 else if (bs->encrypted)
2438 return bs->filename;
2439 else
2440 return NULL;
2441}
2442
ths5fafdf22007-09-16 21:08:06 +00002443void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002444 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002445{
Kevin Wolf3574c602011-10-26 11:02:11 +02002446 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002447}
2448
ths5fafdf22007-09-16 21:08:06 +00002449int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002450 const uint8_t *buf, int nb_sectors)
2451{
2452 BlockDriver *drv = bs->drv;
2453 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002454 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002455 if (!drv->bdrv_write_compressed)
2456 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002457 if (bdrv_check_request(bs, sector_num, nb_sectors))
2458 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002459
Jan Kiszkac6d22832009-11-30 18:21:20 +01002460 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002461 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2462 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002463
bellardfaea38e2006-08-05 21:31:00 +00002464 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2465}
ths3b46e622007-09-17 08:09:54 +00002466
bellardfaea38e2006-08-05 21:31:00 +00002467int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2468{
2469 BlockDriver *drv = bs->drv;
2470 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002471 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002472 if (!drv->bdrv_get_info)
2473 return -ENOTSUP;
2474 memset(bdi, 0, sizeof(*bdi));
2475 return drv->bdrv_get_info(bs, bdi);
2476}
2477
Christoph Hellwig45566e92009-07-10 23:11:57 +02002478int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2479 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002480{
2481 BlockDriver *drv = bs->drv;
2482 if (!drv)
2483 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002484 if (drv->bdrv_save_vmstate)
2485 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2486 if (bs->file)
2487 return bdrv_save_vmstate(bs->file, buf, pos, size);
2488 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002489}
2490
Christoph Hellwig45566e92009-07-10 23:11:57 +02002491int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2492 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002493{
2494 BlockDriver *drv = bs->drv;
2495 if (!drv)
2496 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002497 if (drv->bdrv_load_vmstate)
2498 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2499 if (bs->file)
2500 return bdrv_load_vmstate(bs->file, buf, pos, size);
2501 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002502}
2503
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002504void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2505{
2506 BlockDriver *drv = bs->drv;
2507
2508 if (!drv || !drv->bdrv_debug_event) {
2509 return;
2510 }
2511
2512 return drv->bdrv_debug_event(bs, event);
2513
2514}
2515
bellardfaea38e2006-08-05 21:31:00 +00002516/**************************************************************/
2517/* handling of snapshots */
2518
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002519int bdrv_can_snapshot(BlockDriverState *bs)
2520{
2521 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002522 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002523 return 0;
2524 }
2525
2526 if (!drv->bdrv_snapshot_create) {
2527 if (bs->file != NULL) {
2528 return bdrv_can_snapshot(bs->file);
2529 }
2530 return 0;
2531 }
2532
2533 return 1;
2534}
2535
Blue Swirl199630b2010-07-25 20:49:34 +00002536int bdrv_is_snapshot(BlockDriverState *bs)
2537{
2538 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2539}
2540
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002541BlockDriverState *bdrv_snapshots(void)
2542{
2543 BlockDriverState *bs;
2544
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002545 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002546 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002547 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002548
2549 bs = NULL;
2550 while ((bs = bdrv_next(bs))) {
2551 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002552 bs_snapshots = bs;
2553 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002554 }
2555 }
2556 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002557}
2558
ths5fafdf22007-09-16 21:08:06 +00002559int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002560 QEMUSnapshotInfo *sn_info)
2561{
2562 BlockDriver *drv = bs->drv;
2563 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002564 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002565 if (drv->bdrv_snapshot_create)
2566 return drv->bdrv_snapshot_create(bs, sn_info);
2567 if (bs->file)
2568 return bdrv_snapshot_create(bs->file, sn_info);
2569 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002570}
2571
ths5fafdf22007-09-16 21:08:06 +00002572int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002573 const char *snapshot_id)
2574{
2575 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002576 int ret, open_ret;
2577
bellardfaea38e2006-08-05 21:31:00 +00002578 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002579 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002580 if (drv->bdrv_snapshot_goto)
2581 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2582
2583 if (bs->file) {
2584 drv->bdrv_close(bs);
2585 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2586 open_ret = drv->bdrv_open(bs, bs->open_flags);
2587 if (open_ret < 0) {
2588 bdrv_delete(bs->file);
2589 bs->drv = NULL;
2590 return open_ret;
2591 }
2592 return ret;
2593 }
2594
2595 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002596}
2597
2598int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2599{
2600 BlockDriver *drv = bs->drv;
2601 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002602 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002603 if (drv->bdrv_snapshot_delete)
2604 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2605 if (bs->file)
2606 return bdrv_snapshot_delete(bs->file, snapshot_id);
2607 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002608}
2609
ths5fafdf22007-09-16 21:08:06 +00002610int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00002611 QEMUSnapshotInfo **psn_info)
2612{
2613 BlockDriver *drv = bs->drv;
2614 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002615 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002616 if (drv->bdrv_snapshot_list)
2617 return drv->bdrv_snapshot_list(bs, psn_info);
2618 if (bs->file)
2619 return bdrv_snapshot_list(bs->file, psn_info);
2620 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00002621}
2622
edison51ef6722010-09-21 19:58:41 -07002623int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2624 const char *snapshot_name)
2625{
2626 BlockDriver *drv = bs->drv;
2627 if (!drv) {
2628 return -ENOMEDIUM;
2629 }
2630 if (!bs->read_only) {
2631 return -EINVAL;
2632 }
2633 if (drv->bdrv_snapshot_load_tmp) {
2634 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2635 }
2636 return -ENOTSUP;
2637}
2638
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00002639BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2640 const char *backing_file)
2641{
2642 if (!bs->drv) {
2643 return NULL;
2644 }
2645
2646 if (bs->backing_hd) {
2647 if (strcmp(bs->backing_file, backing_file) == 0) {
2648 return bs->backing_hd;
2649 } else {
2650 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2651 }
2652 }
2653
2654 return NULL;
2655}
2656
bellardfaea38e2006-08-05 21:31:00 +00002657#define NB_SUFFIXES 4
2658
2659char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2660{
2661 static const char suffixes[NB_SUFFIXES] = "KMGT";
2662 int64_t base;
2663 int i;
2664
2665 if (size <= 999) {
2666 snprintf(buf, buf_size, "%" PRId64, size);
2667 } else {
2668 base = 1024;
2669 for(i = 0; i < NB_SUFFIXES; i++) {
2670 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00002671 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00002672 (double)size / base,
2673 suffixes[i]);
2674 break;
2675 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00002676 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00002677 ((size + (base >> 1)) / base),
2678 suffixes[i]);
2679 break;
2680 }
2681 base = base * 1024;
2682 }
2683 }
2684 return buf;
2685}
2686
2687char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2688{
2689 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00002690#ifdef _WIN32
2691 struct tm *ptm;
2692#else
bellardfaea38e2006-08-05 21:31:00 +00002693 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00002694#endif
bellardfaea38e2006-08-05 21:31:00 +00002695 time_t ti;
2696 int64_t secs;
2697
2698 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00002699 snprintf(buf, buf_size,
2700 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002701 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2702 } else {
2703 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00002704#ifdef _WIN32
2705 ptm = localtime(&ti);
2706 strftime(date_buf, sizeof(date_buf),
2707 "%Y-%m-%d %H:%M:%S", ptm);
2708#else
bellardfaea38e2006-08-05 21:31:00 +00002709 localtime_r(&ti, &tm);
2710 strftime(date_buf, sizeof(date_buf),
2711 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00002712#endif
bellardfaea38e2006-08-05 21:31:00 +00002713 secs = sn->vm_clock_nsec / 1000000000;
2714 snprintf(clock_buf, sizeof(clock_buf),
2715 "%02d:%02d:%02d.%03d",
2716 (int)(secs / 3600),
2717 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00002718 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00002719 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2720 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00002721 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00002722 sn->id_str, sn->name,
2723 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2724 date_buf,
2725 clock_buf);
2726 }
2727 return buf;
2728}
2729
bellard83f64092006-08-01 16:21:11 +00002730/**************************************************************/
2731/* async I/Os */
2732
aliguori3b69e4b2009-01-22 16:59:24 +00002733BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00002734 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00002735 BlockDriverCompletionFunc *cb, void *opaque)
2736{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002737 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2738
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01002739 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002740 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00002741}
2742
aliguorif141eaf2009-04-07 18:43:24 +00002743BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2744 QEMUIOVector *qiov, int nb_sectors,
2745 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00002746{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01002747 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2748
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01002749 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01002750 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00002751}
2752
Kevin Wolf40b4f532009-09-09 17:53:37 +02002753
2754typedef struct MultiwriteCB {
2755 int error;
2756 int num_requests;
2757 int num_callbacks;
2758 struct {
2759 BlockDriverCompletionFunc *cb;
2760 void *opaque;
2761 QEMUIOVector *free_qiov;
2762 void *free_buf;
2763 } callbacks[];
2764} MultiwriteCB;
2765
2766static void multiwrite_user_cb(MultiwriteCB *mcb)
2767{
2768 int i;
2769
2770 for (i = 0; i < mcb->num_callbacks; i++) {
2771 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01002772 if (mcb->callbacks[i].free_qiov) {
2773 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2774 }
Anthony Liguori7267c092011-08-20 22:09:37 -05002775 g_free(mcb->callbacks[i].free_qiov);
Herve Poussineauf8a83242010-01-24 21:23:56 +00002776 qemu_vfree(mcb->callbacks[i].free_buf);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002777 }
2778}
2779
2780static void multiwrite_cb(void *opaque, int ret)
2781{
2782 MultiwriteCB *mcb = opaque;
2783
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002784 trace_multiwrite_cb(mcb, ret);
2785
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02002786 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02002787 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002788 }
2789
2790 mcb->num_requests--;
2791 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02002792 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05002793 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002794 }
2795}
2796
2797static int multiwrite_req_compare(const void *a, const void *b)
2798{
Christoph Hellwig77be4362010-05-19 20:53:10 +02002799 const BlockRequest *req1 = a, *req2 = b;
2800
2801 /*
2802 * Note that we can't simply subtract req2->sector from req1->sector
2803 * here as that could overflow the return value.
2804 */
2805 if (req1->sector > req2->sector) {
2806 return 1;
2807 } else if (req1->sector < req2->sector) {
2808 return -1;
2809 } else {
2810 return 0;
2811 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02002812}
2813
2814/*
2815 * Takes a bunch of requests and tries to merge them. Returns the number of
2816 * requests that remain after merging.
2817 */
2818static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2819 int num_reqs, MultiwriteCB *mcb)
2820{
2821 int i, outidx;
2822
2823 // Sort requests by start sector
2824 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2825
2826 // Check if adjacent requests touch the same clusters. If so, combine them,
2827 // filling up gaps with zero sectors.
2828 outidx = 0;
2829 for (i = 1; i < num_reqs; i++) {
2830 int merge = 0;
2831 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2832
2833 // This handles the cases that are valid for all block drivers, namely
2834 // exactly sequential writes and overlapping writes.
2835 if (reqs[i].sector <= oldreq_last) {
2836 merge = 1;
2837 }
2838
2839 // The block driver may decide that it makes sense to combine requests
2840 // even if there is a gap of some sectors between them. In this case,
2841 // the gap is filled with zeros (therefore only applicable for yet
2842 // unused space in format like qcow2).
2843 if (!merge && bs->drv->bdrv_merge_requests) {
2844 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2845 }
2846
Christoph Hellwige2a305f2010-01-26 14:49:08 +01002847 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2848 merge = 0;
2849 }
2850
Kevin Wolf40b4f532009-09-09 17:53:37 +02002851 if (merge) {
2852 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05002853 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002854 qemu_iovec_init(qiov,
2855 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2856
2857 // Add the first request to the merged one. If the requests are
2858 // overlapping, drop the last sectors of the first request.
2859 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2860 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2861
2862 // We might need to add some zeros between the two requests
2863 if (reqs[i].sector > oldreq_last) {
2864 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2865 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2866 memset(buf, 0, zero_bytes);
2867 qemu_iovec_add(qiov, buf, zero_bytes);
2868 mcb->callbacks[i].free_buf = buf;
2869 }
2870
2871 // Add the second request
2872 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2873
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02002874 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002875 reqs[outidx].qiov = qiov;
2876
2877 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2878 } else {
2879 outidx++;
2880 reqs[outidx].sector = reqs[i].sector;
2881 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2882 reqs[outidx].qiov = reqs[i].qiov;
2883 }
2884 }
2885
2886 return outidx + 1;
2887}
2888
2889/*
2890 * Submit multiple AIO write requests at once.
2891 *
2892 * On success, the function returns 0 and all requests in the reqs array have
2893 * been submitted. In error case this function returns -1, and any of the
2894 * requests may or may not be submitted yet. In particular, this means that the
2895 * callback will be called for some of the requests, for others it won't. The
2896 * caller must check the error field of the BlockRequest to wait for the right
2897 * callbacks (if error != 0, no callback will be called).
2898 *
2899 * The implementation may modify the contents of the reqs array, e.g. to merge
2900 * requests. However, the fields opaque and error are left unmodified as they
2901 * are used to signal failure for a single request to the caller.
2902 */
2903int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2904{
Kevin Wolf40b4f532009-09-09 17:53:37 +02002905 MultiwriteCB *mcb;
2906 int i;
2907
Ryan Harper301db7c2011-03-07 10:01:04 -06002908 /* don't submit writes if we don't have a medium */
2909 if (bs->drv == NULL) {
2910 for (i = 0; i < num_reqs; i++) {
2911 reqs[i].error = -ENOMEDIUM;
2912 }
2913 return -1;
2914 }
2915
Kevin Wolf40b4f532009-09-09 17:53:37 +02002916 if (num_reqs == 0) {
2917 return 0;
2918 }
2919
2920 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05002921 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02002922 mcb->num_requests = 0;
2923 mcb->num_callbacks = num_reqs;
2924
2925 for (i = 0; i < num_reqs; i++) {
2926 mcb->callbacks[i].cb = reqs[i].cb;
2927 mcb->callbacks[i].opaque = reqs[i].opaque;
2928 }
2929
2930 // Check for mergable requests
2931 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2932
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01002933 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2934
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01002935 /* Run the aio requests. */
2936 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002937 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01002938 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02002939 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02002940 }
2941
2942 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02002943}
2944
bellard83f64092006-08-01 16:21:11 +00002945void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00002946{
aliguori6bbff9a2009-03-20 18:25:59 +00002947 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00002948}
2949
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002950/* block I/O throttling */
2951static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2952 bool is_write, double elapsed_time, uint64_t *wait)
2953{
2954 uint64_t bps_limit = 0;
2955 double bytes_limit, bytes_base, bytes_res;
2956 double slice_time, wait_time;
2957
2958 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2959 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2960 } else if (bs->io_limits.bps[is_write]) {
2961 bps_limit = bs->io_limits.bps[is_write];
2962 } else {
2963 if (wait) {
2964 *wait = 0;
2965 }
2966
2967 return false;
2968 }
2969
2970 slice_time = bs->slice_end - bs->slice_start;
2971 slice_time /= (NANOSECONDS_PER_SECOND);
2972 bytes_limit = bps_limit * slice_time;
2973 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2974 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2975 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2976 }
2977
2978 /* bytes_base: the bytes of data which have been read/written; and
2979 * it is obtained from the history statistic info.
2980 * bytes_res: the remaining bytes of data which need to be read/written.
2981 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2982 * the total time for completing reading/writting all data.
2983 */
2984 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2985
2986 if (bytes_base + bytes_res <= bytes_limit) {
2987 if (wait) {
2988 *wait = 0;
2989 }
2990
2991 return false;
2992 }
2993
2994 /* Calc approx time to dispatch */
2995 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2996
2997 /* When the I/O rate at runtime exceeds the limits,
2998 * bs->slice_end need to be extended in order that the current statistic
2999 * info can be kept until the timer fire, so it is increased and tuned
3000 * based on the result of experiment.
3001 */
3002 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3003 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3004 if (wait) {
3005 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3006 }
3007
3008 return true;
3009}
3010
3011static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3012 double elapsed_time, uint64_t *wait)
3013{
3014 uint64_t iops_limit = 0;
3015 double ios_limit, ios_base;
3016 double slice_time, wait_time;
3017
3018 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3019 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3020 } else if (bs->io_limits.iops[is_write]) {
3021 iops_limit = bs->io_limits.iops[is_write];
3022 } else {
3023 if (wait) {
3024 *wait = 0;
3025 }
3026
3027 return false;
3028 }
3029
3030 slice_time = bs->slice_end - bs->slice_start;
3031 slice_time /= (NANOSECONDS_PER_SECOND);
3032 ios_limit = iops_limit * slice_time;
3033 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3034 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3035 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3036 }
3037
3038 if (ios_base + 1 <= ios_limit) {
3039 if (wait) {
3040 *wait = 0;
3041 }
3042
3043 return false;
3044 }
3045
3046 /* Calc approx time to dispatch */
3047 wait_time = (ios_base + 1) / iops_limit;
3048 if (wait_time > elapsed_time) {
3049 wait_time = wait_time - elapsed_time;
3050 } else {
3051 wait_time = 0;
3052 }
3053
3054 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3055 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3056 if (wait) {
3057 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3058 }
3059
3060 return true;
3061}
3062
3063static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3064 bool is_write, int64_t *wait)
3065{
3066 int64_t now, max_wait;
3067 uint64_t bps_wait = 0, iops_wait = 0;
3068 double elapsed_time;
3069 int bps_ret, iops_ret;
3070
3071 now = qemu_get_clock_ns(vm_clock);
3072 if ((bs->slice_start < now)
3073 && (bs->slice_end > now)) {
3074 bs->slice_end = now + bs->slice_time;
3075 } else {
3076 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3077 bs->slice_start = now;
3078 bs->slice_end = now + bs->slice_time;
3079
3080 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3081 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3082
3083 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3084 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3085 }
3086
3087 elapsed_time = now - bs->slice_start;
3088 elapsed_time /= (NANOSECONDS_PER_SECOND);
3089
3090 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3091 is_write, elapsed_time, &bps_wait);
3092 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3093 elapsed_time, &iops_wait);
3094 if (bps_ret || iops_ret) {
3095 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3096 if (wait) {
3097 *wait = max_wait;
3098 }
3099
3100 now = qemu_get_clock_ns(vm_clock);
3101 if (bs->slice_end < now + max_wait) {
3102 bs->slice_end = now + max_wait;
3103 }
3104
3105 return true;
3106 }
3107
3108 if (wait) {
3109 *wait = 0;
3110 }
3111
3112 return false;
3113}
pbrookce1a14d2006-08-07 02:38:06 +00003114
bellard83f64092006-08-01 16:21:11 +00003115/**************************************************************/
3116/* async block device emulation */
3117
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003118typedef struct BlockDriverAIOCBSync {
3119 BlockDriverAIOCB common;
3120 QEMUBH *bh;
3121 int ret;
3122 /* vector translation state */
3123 QEMUIOVector *qiov;
3124 uint8_t *bounce;
3125 int is_write;
3126} BlockDriverAIOCBSync;
3127
3128static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3129{
Kevin Wolfb666d232010-05-05 11:44:39 +02003130 BlockDriverAIOCBSync *acb =
3131 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003132 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003133 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003134 qemu_aio_release(acb);
3135}
3136
3137static AIOPool bdrv_em_aio_pool = {
3138 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3139 .cancel = bdrv_aio_cancel_em,
3140};
3141
bellard83f64092006-08-01 16:21:11 +00003142static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003143{
pbrookce1a14d2006-08-07 02:38:06 +00003144 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003145
aliguorif141eaf2009-04-07 18:43:24 +00003146 if (!acb->is_write)
3147 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003148 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003149 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003150 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003151 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003152 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003153}
bellardbeac80c2006-06-26 20:08:57 +00003154
aliguorif141eaf2009-04-07 18:43:24 +00003155static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3156 int64_t sector_num,
3157 QEMUIOVector *qiov,
3158 int nb_sectors,
3159 BlockDriverCompletionFunc *cb,
3160 void *opaque,
3161 int is_write)
3162
bellardea2384d2004-08-01 21:59:26 +00003163{
pbrookce1a14d2006-08-07 02:38:06 +00003164 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003165
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003166 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003167 acb->is_write = is_write;
3168 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003169 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003170 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003171
3172 if (is_write) {
3173 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003174 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003175 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003176 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003177 }
3178
pbrookce1a14d2006-08-07 02:38:06 +00003179 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003180
pbrookce1a14d2006-08-07 02:38:06 +00003181 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003182}
3183
aliguorif141eaf2009-04-07 18:43:24 +00003184static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3185 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003186 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003187{
aliguorif141eaf2009-04-07 18:43:24 +00003188 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003189}
3190
aliguorif141eaf2009-04-07 18:43:24 +00003191static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3192 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3193 BlockDriverCompletionFunc *cb, void *opaque)
3194{
3195 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3196}
3197
Kevin Wolf68485422011-06-30 10:05:46 +02003198
3199typedef struct BlockDriverAIOCBCoroutine {
3200 BlockDriverAIOCB common;
3201 BlockRequest req;
3202 bool is_write;
3203 QEMUBH* bh;
3204} BlockDriverAIOCBCoroutine;
3205
3206static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3207{
3208 qemu_aio_flush();
3209}
3210
3211static AIOPool bdrv_em_co_aio_pool = {
3212 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3213 .cancel = bdrv_aio_co_cancel_em,
3214};
3215
Paolo Bonzini35246a62011-10-14 10:41:29 +02003216static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003217{
3218 BlockDriverAIOCBCoroutine *acb = opaque;
3219
3220 acb->common.cb(acb->common.opaque, acb->req.error);
3221 qemu_bh_delete(acb->bh);
3222 qemu_aio_release(acb);
3223}
3224
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003225/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3226static void coroutine_fn bdrv_co_do_rw(void *opaque)
3227{
3228 BlockDriverAIOCBCoroutine *acb = opaque;
3229 BlockDriverState *bs = acb->common.bs;
3230
3231 if (!acb->is_write) {
3232 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003233 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003234 } else {
3235 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003236 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003237 }
3238
Paolo Bonzini35246a62011-10-14 10:41:29 +02003239 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003240 qemu_bh_schedule(acb->bh);
3241}
3242
Kevin Wolf68485422011-06-30 10:05:46 +02003243static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3244 int64_t sector_num,
3245 QEMUIOVector *qiov,
3246 int nb_sectors,
3247 BlockDriverCompletionFunc *cb,
3248 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003249 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003250{
3251 Coroutine *co;
3252 BlockDriverAIOCBCoroutine *acb;
3253
3254 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3255 acb->req.sector = sector_num;
3256 acb->req.nb_sectors = nb_sectors;
3257 acb->req.qiov = qiov;
3258 acb->is_write = is_write;
3259
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003260 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003261 qemu_coroutine_enter(co, acb);
3262
3263 return &acb->common;
3264}
3265
Paolo Bonzini07f07612011-10-17 12:32:12 +02003266static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003267{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003268 BlockDriverAIOCBCoroutine *acb = opaque;
3269 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003270
Paolo Bonzini07f07612011-10-17 12:32:12 +02003271 acb->req.error = bdrv_co_flush(bs);
3272 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003273 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003274}
3275
Paolo Bonzini07f07612011-10-17 12:32:12 +02003276BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003277 BlockDriverCompletionFunc *cb, void *opaque)
3278{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003279 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003280
Paolo Bonzini07f07612011-10-17 12:32:12 +02003281 Coroutine *co;
3282 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003283
Paolo Bonzini07f07612011-10-17 12:32:12 +02003284 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3285 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3286 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003287
Alexander Graf016f5cf2010-05-26 17:51:49 +02003288 return &acb->common;
3289}
3290
Paolo Bonzini4265d622011-10-17 12:32:14 +02003291static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3292{
3293 BlockDriverAIOCBCoroutine *acb = opaque;
3294 BlockDriverState *bs = acb->common.bs;
3295
3296 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3297 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3298 qemu_bh_schedule(acb->bh);
3299}
3300
3301BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3302 int64_t sector_num, int nb_sectors,
3303 BlockDriverCompletionFunc *cb, void *opaque)
3304{
3305 Coroutine *co;
3306 BlockDriverAIOCBCoroutine *acb;
3307
3308 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3309
3310 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3311 acb->req.sector = sector_num;
3312 acb->req.nb_sectors = nb_sectors;
3313 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3314 qemu_coroutine_enter(co, acb);
3315
3316 return &acb->common;
3317}
3318
bellardea2384d2004-08-01 21:59:26 +00003319void bdrv_init(void)
3320{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003321 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003322}
pbrookce1a14d2006-08-07 02:38:06 +00003323
Markus Armbrustereb852012009-10-27 18:41:44 +01003324void bdrv_init_with_whitelist(void)
3325{
3326 use_bdrv_whitelist = 1;
3327 bdrv_init();
3328}
3329
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003330void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3331 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003332{
pbrookce1a14d2006-08-07 02:38:06 +00003333 BlockDriverAIOCB *acb;
3334
aliguori6bbff9a2009-03-20 18:25:59 +00003335 if (pool->free_aiocb) {
3336 acb = pool->free_aiocb;
3337 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003338 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003339 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003340 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003341 }
3342 acb->bs = bs;
3343 acb->cb = cb;
3344 acb->opaque = opaque;
3345 return acb;
3346}
3347
3348void qemu_aio_release(void *p)
3349{
aliguori6bbff9a2009-03-20 18:25:59 +00003350 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3351 AIOPool *pool = acb->pool;
3352 acb->next = pool->free_aiocb;
3353 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003354}
bellard19cb3732006-08-19 11:45:59 +00003355
3356/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003357/* Coroutine block device emulation */
3358
3359typedef struct CoroutineIOCompletion {
3360 Coroutine *coroutine;
3361 int ret;
3362} CoroutineIOCompletion;
3363
3364static void bdrv_co_io_em_complete(void *opaque, int ret)
3365{
3366 CoroutineIOCompletion *co = opaque;
3367
3368 co->ret = ret;
3369 qemu_coroutine_enter(co->coroutine, NULL);
3370}
3371
3372static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3373 int nb_sectors, QEMUIOVector *iov,
3374 bool is_write)
3375{
3376 CoroutineIOCompletion co = {
3377 .coroutine = qemu_coroutine_self(),
3378 };
3379 BlockDriverAIOCB *acb;
3380
3381 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003382 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3383 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003384 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003385 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3386 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003387 }
3388
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003389 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003390 if (!acb) {
3391 return -EIO;
3392 }
3393 qemu_coroutine_yield();
3394
3395 return co.ret;
3396}
3397
3398static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3399 int64_t sector_num, int nb_sectors,
3400 QEMUIOVector *iov)
3401{
3402 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3403}
3404
3405static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3406 int64_t sector_num, int nb_sectors,
3407 QEMUIOVector *iov)
3408{
3409 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3410}
3411
Paolo Bonzini07f07612011-10-17 12:32:12 +02003412static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003413{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003414 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003415
Paolo Bonzini07f07612011-10-17 12:32:12 +02003416 rwco->ret = bdrv_co_flush(rwco->bs);
3417}
3418
3419int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3420{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003421 int ret;
3422
Kevin Wolfca716362011-11-10 18:13:59 +01003423 if (!bs->drv) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003424 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003425 }
3426
Kevin Wolfca716362011-11-10 18:13:59 +01003427 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003428 if (bs->drv->bdrv_co_flush_to_os) {
3429 ret = bs->drv->bdrv_co_flush_to_os(bs);
3430 if (ret < 0) {
3431 return ret;
3432 }
3433 }
3434
Kevin Wolfca716362011-11-10 18:13:59 +01003435 /* But don't actually force it to the disk with cache=unsafe */
3436 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3437 return 0;
3438 }
3439
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003440 if (bs->drv->bdrv_co_flush_to_disk) {
Kevin Wolfc68b89a2011-11-10 17:25:44 +01003441 return bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003442 } else if (bs->drv->bdrv_aio_flush) {
3443 BlockDriverAIOCB *acb;
3444 CoroutineIOCompletion co = {
3445 .coroutine = qemu_coroutine_self(),
3446 };
3447
3448 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3449 if (acb == NULL) {
3450 return -EIO;
3451 } else {
3452 qemu_coroutine_yield();
3453 return co.ret;
3454 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003455 } else {
3456 /*
3457 * Some block drivers always operate in either writethrough or unsafe
3458 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3459 * know how the server works (because the behaviour is hardcoded or
3460 * depends on server-side configuration), so we can't ensure that
3461 * everything is safe on disk. Returning an error doesn't work because
3462 * that would break guests even if the server operates in writethrough
3463 * mode.
3464 *
3465 * Let's hope the user knows what he's doing.
3466 */
3467 return 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003468 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003469}
3470
Anthony Liguori0f154232011-11-14 15:09:45 -06003471void bdrv_invalidate_cache(BlockDriverState *bs)
3472{
3473 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3474 bs->drv->bdrv_invalidate_cache(bs);
3475 }
3476}
3477
3478void bdrv_invalidate_cache_all(void)
3479{
3480 BlockDriverState *bs;
3481
3482 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3483 bdrv_invalidate_cache(bs);
3484 }
3485}
3486
Paolo Bonzini07f07612011-10-17 12:32:12 +02003487int bdrv_flush(BlockDriverState *bs)
3488{
3489 Coroutine *co;
3490 RwCo rwco = {
3491 .bs = bs,
3492 .ret = NOT_DONE,
3493 };
3494
3495 if (qemu_in_coroutine()) {
3496 /* Fast-path if already in coroutine context */
3497 bdrv_flush_co_entry(&rwco);
3498 } else {
3499 co = qemu_coroutine_create(bdrv_flush_co_entry);
3500 qemu_coroutine_enter(co, &rwco);
3501 while (rwco.ret == NOT_DONE) {
3502 qemu_aio_wait();
3503 }
3504 }
3505
3506 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003507}
3508
Paolo Bonzini4265d622011-10-17 12:32:14 +02003509static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3510{
3511 RwCo *rwco = opaque;
3512
3513 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3514}
3515
3516int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3517 int nb_sectors)
3518{
3519 if (!bs->drv) {
3520 return -ENOMEDIUM;
3521 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3522 return -EIO;
3523 } else if (bs->read_only) {
3524 return -EROFS;
3525 } else if (bs->drv->bdrv_co_discard) {
3526 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3527 } else if (bs->drv->bdrv_aio_discard) {
3528 BlockDriverAIOCB *acb;
3529 CoroutineIOCompletion co = {
3530 .coroutine = qemu_coroutine_self(),
3531 };
3532
3533 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3534 bdrv_co_io_em_complete, &co);
3535 if (acb == NULL) {
3536 return -EIO;
3537 } else {
3538 qemu_coroutine_yield();
3539 return co.ret;
3540 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02003541 } else {
3542 return 0;
3543 }
3544}
3545
3546int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3547{
3548 Coroutine *co;
3549 RwCo rwco = {
3550 .bs = bs,
3551 .sector_num = sector_num,
3552 .nb_sectors = nb_sectors,
3553 .ret = NOT_DONE,
3554 };
3555
3556 if (qemu_in_coroutine()) {
3557 /* Fast-path if already in coroutine context */
3558 bdrv_discard_co_entry(&rwco);
3559 } else {
3560 co = qemu_coroutine_create(bdrv_discard_co_entry);
3561 qemu_coroutine_enter(co, &rwco);
3562 while (rwco.ret == NOT_DONE) {
3563 qemu_aio_wait();
3564 }
3565 }
3566
3567 return rwco.ret;
3568}
3569
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003570/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00003571/* removable device support */
3572
3573/**
3574 * Return TRUE if the media is present
3575 */
3576int bdrv_is_inserted(BlockDriverState *bs)
3577{
3578 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003579
bellard19cb3732006-08-19 11:45:59 +00003580 if (!drv)
3581 return 0;
3582 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02003583 return 1;
3584 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00003585}
3586
3587/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003588 * Return whether the media changed since the last call to this
3589 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00003590 */
3591int bdrv_media_changed(BlockDriverState *bs)
3592{
3593 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003594
Markus Armbruster8e49ca42011-08-03 15:08:08 +02003595 if (drv && drv->bdrv_media_changed) {
3596 return drv->bdrv_media_changed(bs);
3597 }
3598 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00003599}
3600
3601/**
3602 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3603 */
Markus Armbrusterfdec4402011-09-06 18:58:45 +02003604void bdrv_eject(BlockDriverState *bs, int eject_flag)
bellard19cb3732006-08-19 11:45:59 +00003605{
3606 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00003607
Markus Armbruster822e1cd2011-07-20 18:23:42 +02003608 if (drv && drv->bdrv_eject) {
3609 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00003610 }
bellard19cb3732006-08-19 11:45:59 +00003611}
3612
bellard19cb3732006-08-19 11:45:59 +00003613/**
3614 * Lock or unlock the media (if it is locked, the user won't be able
3615 * to eject it manually).
3616 */
Markus Armbruster025e8492011-09-06 18:58:47 +02003617void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00003618{
3619 BlockDriver *drv = bs->drv;
3620
Markus Armbruster025e8492011-09-06 18:58:47 +02003621 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01003622
Markus Armbruster025e8492011-09-06 18:58:47 +02003623 if (drv && drv->bdrv_lock_medium) {
3624 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00003625 }
3626}
ths985a03b2007-12-24 16:10:43 +00003627
3628/* needed for generic scsi interface */
3629
3630int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3631{
3632 BlockDriver *drv = bs->drv;
3633
3634 if (drv && drv->bdrv_ioctl)
3635 return drv->bdrv_ioctl(bs, req, buf);
3636 return -ENOTSUP;
3637}
aliguori7d780662009-03-12 19:57:08 +00003638
aliguori221f7152009-03-28 17:28:41 +00003639BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3640 unsigned long int req, void *buf,
3641 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00003642{
aliguori221f7152009-03-28 17:28:41 +00003643 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00003644
aliguori221f7152009-03-28 17:28:41 +00003645 if (drv && drv->bdrv_aio_ioctl)
3646 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3647 return NULL;
aliguori7d780662009-03-12 19:57:08 +00003648}
aliguorie268ca52009-04-22 20:20:00 +00003649
Markus Armbruster7b6f9302011-09-06 18:58:56 +02003650void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3651{
3652 bs->buffer_alignment = align;
3653}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003654
aliguorie268ca52009-04-22 20:20:00 +00003655void *qemu_blockalign(BlockDriverState *bs, size_t size)
3656{
3657 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3658}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003659
3660void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3661{
3662 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003663
Liran Schouraaa0eb72010-01-26 10:31:48 +02003664 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003665 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003666 if (!bs->dirty_bitmap) {
3667 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3668 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3669 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003670
Anthony Liguori7267c092011-08-20 22:09:37 -05003671 bs->dirty_bitmap = g_malloc0(bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003672 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003673 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01003674 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05003675 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01003676 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003677 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003678 }
3679}
3680
3681int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3682{
Jan Kiszka6ea44302009-11-30 18:21:19 +01003683 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003684
Jan Kiszkac6d22832009-11-30 18:21:20 +01003685 if (bs->dirty_bitmap &&
3686 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02003687 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3688 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003689 } else {
3690 return 0;
3691 }
3692}
3693
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003694void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3695 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02003696{
3697 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3698}
Liran Schouraaa0eb72010-01-26 10:31:48 +02003699
3700int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3701{
3702 return bs->dirty_count;
3703}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003704
Marcelo Tosattidb593f22011-01-26 12:12:34 -02003705void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3706{
3707 assert(bs->in_use != in_use);
3708 bs->in_use = in_use;
3709}
3710
3711int bdrv_in_use(BlockDriverState *bs)
3712{
3713 return bs->in_use;
3714}
3715
Luiz Capitulino28a72822011-09-26 17:43:50 -03003716void bdrv_iostatus_enable(BlockDriverState *bs)
3717{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003718 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003719 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003720}
3721
3722/* The I/O status is only enabled if the drive explicitly
3723 * enables it _and_ the VM is configured to stop on errors */
3724bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3725{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003726 return (bs->iostatus_enabled &&
Luiz Capitulino28a72822011-09-26 17:43:50 -03003727 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3728 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3729 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3730}
3731
3732void bdrv_iostatus_disable(BlockDriverState *bs)
3733{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03003734 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003735}
3736
3737void bdrv_iostatus_reset(BlockDriverState *bs)
3738{
3739 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003740 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003741 }
3742}
3743
3744/* XXX: Today this is set by device models because it makes the implementation
3745 quite simple. However, the block layer knows about the error, so it's
3746 possible to implement this without device models being involved */
3747void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3748{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003749 if (bdrv_iostatus_is_enabled(bs) &&
3750 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03003751 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03003752 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3753 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03003754 }
3755}
3756
Christoph Hellwiga597e792011-08-25 08:26:01 +02003757void
3758bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3759 enum BlockAcctType type)
3760{
3761 assert(type < BDRV_MAX_IOTYPE);
3762
3763 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003764 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02003765 cookie->type = type;
3766}
3767
3768void
3769bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3770{
3771 assert(cookie->type < BDRV_MAX_IOTYPE);
3772
3773 bs->nr_bytes[cookie->type] += cookie->bytes;
3774 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02003775 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02003776}
3777
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003778int bdrv_img_create(const char *filename, const char *fmt,
3779 const char *base_filename, const char *base_fmt,
3780 char *options, uint64_t img_size, int flags)
3781{
3782 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02003783 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003784 BlockDriverState *bs = NULL;
3785 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003786 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003787 int ret = 0;
3788
3789 /* Find driver and parse its options */
3790 drv = bdrv_find_format(fmt);
3791 if (!drv) {
3792 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003793 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003794 goto out;
3795 }
3796
3797 proto_drv = bdrv_find_protocol(filename);
3798 if (!proto_drv) {
3799 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003800 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003801 goto out;
3802 }
3803
3804 create_options = append_option_parameters(create_options,
3805 drv->create_options);
3806 create_options = append_option_parameters(create_options,
3807 proto_drv->create_options);
3808
3809 /* Create parameter list with default values */
3810 param = parse_option_parameters("", create_options, param);
3811
3812 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3813
3814 /* Parse -o options */
3815 if (options) {
3816 param = parse_option_parameters(options, create_options, param);
3817 if (param == NULL) {
3818 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003819 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003820 goto out;
3821 }
3822 }
3823
3824 if (base_filename) {
3825 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3826 base_filename)) {
3827 error_report("Backing file not supported for file format '%s'",
3828 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003829 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003830 goto out;
3831 }
3832 }
3833
3834 if (base_fmt) {
3835 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3836 error_report("Backing file format not supported for file "
3837 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003838 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003839 goto out;
3840 }
3841 }
3842
Jes Sorensen792da932010-12-16 13:52:17 +01003843 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3844 if (backing_file && backing_file->value.s) {
3845 if (!strcmp(filename, backing_file->value.s)) {
3846 error_report("Error: Trying to create an image with the "
3847 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003848 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01003849 goto out;
3850 }
3851 }
3852
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003853 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3854 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003855 backing_drv = bdrv_find_format(backing_fmt->value.s);
3856 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003857 error_report("Unknown backing file format '%s'",
3858 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01003859 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003860 goto out;
3861 }
3862 }
3863
3864 // The size for the image must always be specified, with one exception:
3865 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02003866 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3867 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003868 if (backing_file && backing_file->value.s) {
3869 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003870 char buf[32];
3871
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003872 bs = bdrv_new("");
3873
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003874 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003875 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00003876 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003877 goto out;
3878 }
3879 bdrv_get_geometry(bs, &size);
3880 size *= 512;
3881
3882 snprintf(buf, sizeof(buf), "%" PRId64, size);
3883 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3884 } else {
3885 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01003886 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003887 goto out;
3888 }
3889 }
3890
3891 printf("Formatting '%s', fmt=%s ", filename, fmt);
3892 print_option_parameters(param);
3893 puts("");
3894
3895 ret = bdrv_create(drv, filename, param);
3896
3897 if (ret < 0) {
3898 if (ret == -ENOTSUP) {
3899 error_report("Formatting or formatting option not supported for "
3900 "file format '%s'", fmt);
3901 } else if (ret == -EFBIG) {
3902 error_report("The image size is too large for file format '%s'",
3903 fmt);
3904 } else {
3905 error_report("%s: error while creating %s: %s", filename, fmt,
3906 strerror(-ret));
3907 }
3908 }
3909
3910out:
3911 free_option_parameters(create_options);
3912 free_option_parameters(param);
3913
3914 if (bs) {
3915 bdrv_delete(bs);
3916 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01003917
3918 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01003919}
Stefan Hajnoczieeec61f2012-01-18 14:40:43 +00003920
3921void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
3922 BlockDriverCompletionFunc *cb, void *opaque)
3923{
3924 BlockJob *job;
3925
3926 if (bs->job || bdrv_in_use(bs)) {
3927 return NULL;
3928 }
3929 bdrv_set_in_use(bs, 1);
3930
3931 job = g_malloc0(job_type->instance_size);
3932 job->job_type = job_type;
3933 job->bs = bs;
3934 job->cb = cb;
3935 job->opaque = opaque;
3936 bs->job = job;
3937 return job;
3938}
3939
3940void block_job_complete(BlockJob *job, int ret)
3941{
3942 BlockDriverState *bs = job->bs;
3943
3944 assert(bs->job == job);
3945 job->cb(job->opaque, ret);
3946 bs->job = NULL;
3947 g_free(job);
3948 bdrv_set_in_use(bs, 0);
3949}
3950
3951int block_job_set_speed(BlockJob *job, int64_t value)
3952{
3953 if (!job->job_type->set_speed) {
3954 return -ENOTSUP;
3955 }
3956 return job->job_type->set_speed(job, value);
3957}
3958
3959void block_job_cancel(BlockJob *job)
3960{
3961 job->cancelled = true;
3962}
3963
3964bool block_job_is_cancelled(BlockJob *job)
3965{
3966 return job->cancelled;
3967}