blob: 0bae0461d05e19e97d00ce12adc9f58a90ee52b1 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
aliguori376253e2009-03-05 23:01:23 +000027#include "monitor.h"
bellardea2384d2004-08-01 21:59:26 +000028#include "block_int.h"
Paolo Bonzini2f0c9fe2012-09-28 17:22:47 +020029#include "blockjob.h"
Anthony Liguori5efa9d52009-05-09 17:03:42 -050030#include "module.h"
Luiz Capitulinof795e742011-10-21 16:05:43 -020031#include "qjson.h"
Kevin Wolf68485422011-06-30 10:05:46 +020032#include "qemu-coroutine.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030033#include "qmp-commands.h"
Zhi Yong Wu0563e192011-11-03 16:57:25 +080034#include "qemu-timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000035
Juan Quintela71e72a12009-07-27 16:12:56 +020036#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000037#include <sys/types.h>
38#include <sys/stat.h>
39#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000040#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000041#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/disk.h>
43#endif
blueswir1c5e97232009-03-07 20:06:23 +000044#endif
bellard7674e7b2005-04-26 21:59:26 +000045
aliguori49dc7682009-03-08 16:26:59 +000046#ifdef _WIN32
47#include <windows.h>
48#endif
49
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010050#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
51
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000052typedef enum {
53 BDRV_REQ_COPY_ON_READ = 0x1,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000054 BDRV_REQ_ZERO_WRITE = 0x2,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000055} BdrvRequestFlags;
56
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020057static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000058static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
59 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000060 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000063 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020064static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors,
66 QEMUIOVector *iov);
67static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +010070static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000071 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
72 BdrvRequestFlags flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010073static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000074 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
75 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010076static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
77 int64_t sector_num,
78 QEMUIOVector *qiov,
79 int nb_sectors,
80 BlockDriverCompletionFunc *cb,
81 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010082 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010083static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010084static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
85 int64_t sector_num, int nb_sectors);
bellardec530c82006-04-25 22:36:06 +000086
Zhi Yong Wu98f90db2011-11-08 13:00:14 +080087static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
88 bool is_write, double elapsed_time, uint64_t *wait);
89static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
90 double elapsed_time, uint64_t *wait);
91static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
92 bool is_write, int64_t *wait);
93
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010094static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000096
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrusterf9092b12010-06-25 10:33:39 +0200100/* The device to use for VM snapshots */
101static BlockDriverState *bs_snapshots;
102
Markus Armbrustereb852012009-10-27 18:41:44 +0100103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800126/* throttling disk I/O limits */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800127void bdrv_io_limits_disable(BlockDriverState *bs)
128{
129 bs->io_limits_enabled = false;
130
131 while (qemu_co_queue_next(&bs->throttled_reqs));
132
133 if (bs->block_timer) {
134 qemu_del_timer(bs->block_timer);
135 qemu_free_timer(bs->block_timer);
136 bs->block_timer = NULL;
137 }
138
139 bs->slice_start = 0;
140 bs->slice_end = 0;
141 bs->slice_time = 0;
142 memset(&bs->io_base, 0, sizeof(bs->io_base));
143}
144
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800145static void bdrv_block_timer(void *opaque)
146{
147 BlockDriverState *bs = opaque;
148
149 qemu_co_queue_next(&bs->throttled_reqs);
150}
151
152void bdrv_io_limits_enable(BlockDriverState *bs)
153{
154 qemu_co_queue_init(&bs->throttled_reqs);
155 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
156 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
157 bs->slice_start = qemu_get_clock_ns(vm_clock);
158 bs->slice_end = bs->slice_start + bs->slice_time;
159 memset(&bs->io_base, 0, sizeof(bs->io_base));
160 bs->io_limits_enabled = true;
161}
162
163bool bdrv_io_limits_enabled(BlockDriverState *bs)
164{
165 BlockIOLimit *io_limits = &bs->io_limits;
166 return io_limits->bps[BLOCK_IO_LIMIT_READ]
167 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
168 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
169 || io_limits->iops[BLOCK_IO_LIMIT_READ]
170 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
171 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
172}
173
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800174static void bdrv_io_limits_intercept(BlockDriverState *bs,
175 bool is_write, int nb_sectors)
176{
177 int64_t wait_time = -1;
178
179 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
180 qemu_co_queue_wait(&bs->throttled_reqs);
181 }
182
183 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
184 * throttled requests will not be dequeued until the current request is
185 * allowed to be serviced. So if the current request still exceeds the
186 * limits, it will be inserted to the head. All requests followed it will
187 * be still in throttled_reqs queue.
188 */
189
190 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
191 qemu_mod_timer(bs->block_timer,
192 wait_time + qemu_get_clock_ns(vm_clock));
193 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
194 }
195
196 qemu_co_queue_next(&bs->throttled_reqs);
197}
198
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000199/* check if the path starts with "<protocol>:" */
200static int path_has_protocol(const char *path)
201{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200202 const char *p;
203
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000204#ifdef _WIN32
205 if (is_windows_drive(path) ||
206 is_windows_drive_prefix(path)) {
207 return 0;
208 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200209 p = path + strcspn(path, ":/\\");
210#else
211 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000212#endif
213
Paolo Bonzini947995c2012-05-08 16:51:48 +0200214 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000215}
216
bellard83f64092006-08-01 16:21:11 +0000217int path_is_absolute(const char *path)
218{
bellard21664422007-01-07 18:22:37 +0000219#ifdef _WIN32
220 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200221 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000222 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200223 }
224 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000225#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200226 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000227#endif
bellard83f64092006-08-01 16:21:11 +0000228}
229
230/* if filename is absolute, just copy it to dest. Otherwise, build a
231 path to it by considering it is relative to base_path. URL are
232 supported. */
233void path_combine(char *dest, int dest_size,
234 const char *base_path,
235 const char *filename)
236{
237 const char *p, *p1;
238 int len;
239
240 if (dest_size <= 0)
241 return;
242 if (path_is_absolute(filename)) {
243 pstrcpy(dest, dest_size, filename);
244 } else {
245 p = strchr(base_path, ':');
246 if (p)
247 p++;
248 else
249 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000250 p1 = strrchr(base_path, '/');
251#ifdef _WIN32
252 {
253 const char *p2;
254 p2 = strrchr(base_path, '\\');
255 if (!p1 || p2 > p1)
256 p1 = p2;
257 }
258#endif
bellard83f64092006-08-01 16:21:11 +0000259 if (p1)
260 p1++;
261 else
262 p1 = base_path;
263 if (p1 > p)
264 p = p1;
265 len = p - base_path;
266 if (len > dest_size - 1)
267 len = dest_size - 1;
268 memcpy(dest, base_path, len);
269 dest[len] = '\0';
270 pstrcat(dest, dest_size, filename);
271 }
272}
273
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200274void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
275{
276 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
277 pstrcpy(dest, sz, bs->backing_file);
278 } else {
279 path_combine(dest, sz, bs->filename, bs->backing_file);
280 }
281}
282
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500283void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000284{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100285 /* Block drivers without coroutine functions need emulation */
286 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200287 bdrv->bdrv_co_readv = bdrv_co_readv_em;
288 bdrv->bdrv_co_writev = bdrv_co_writev_em;
289
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100290 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
291 * the block driver lacks aio we need to emulate that too.
292 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200293 if (!bdrv->bdrv_aio_readv) {
294 /* add AIO emulation layer */
295 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
296 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200297 }
bellard83f64092006-08-01 16:21:11 +0000298 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200299
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100300 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000301}
bellardb3380822004-03-14 21:38:54 +0000302
303/* create a new block device (by default it is empty) */
304BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000305{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100306 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000307
Anthony Liguori7267c092011-08-20 22:09:37 -0500308 bs = g_malloc0(sizeof(BlockDriverState));
bellardb3380822004-03-14 21:38:54 +0000309 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000310 if (device_name[0] != '\0') {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100311 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
bellardea2384d2004-08-01 21:59:26 +0000312 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300313 bdrv_iostatus_disable(bs);
bellardb3380822004-03-14 21:38:54 +0000314 return bs;
315}
316
bellardea2384d2004-08-01 21:59:26 +0000317BlockDriver *bdrv_find_format(const char *format_name)
318{
319 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100320 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
321 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000322 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100323 }
bellardea2384d2004-08-01 21:59:26 +0000324 }
325 return NULL;
326}
327
Markus Armbrustereb852012009-10-27 18:41:44 +0100328static int bdrv_is_whitelisted(BlockDriver *drv)
329{
330 static const char *whitelist[] = {
331 CONFIG_BDRV_WHITELIST
332 };
333 const char **p;
334
335 if (!whitelist[0])
336 return 1; /* no whitelist, anything goes */
337
338 for (p = whitelist; *p; p++) {
339 if (!strcmp(drv->format_name, *p)) {
340 return 1;
341 }
342 }
343 return 0;
344}
345
346BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
347{
348 BlockDriver *drv = bdrv_find_format(format_name);
349 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
350}
351
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800352typedef struct CreateCo {
353 BlockDriver *drv;
354 char *filename;
355 QEMUOptionParameter *options;
356 int ret;
357} CreateCo;
358
359static void coroutine_fn bdrv_create_co_entry(void *opaque)
360{
361 CreateCo *cco = opaque;
362 assert(cco->drv);
363
364 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
365}
366
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200367int bdrv_create(BlockDriver *drv, const char* filename,
368 QEMUOptionParameter *options)
bellardea2384d2004-08-01 21:59:26 +0000369{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800370 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200371
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800372 Coroutine *co;
373 CreateCo cco = {
374 .drv = drv,
375 .filename = g_strdup(filename),
376 .options = options,
377 .ret = NOT_DONE,
378 };
379
380 if (!drv->bdrv_create) {
381 return -ENOTSUP;
382 }
383
384 if (qemu_in_coroutine()) {
385 /* Fast-path if already in coroutine context */
386 bdrv_create_co_entry(&cco);
387 } else {
388 co = qemu_coroutine_create(bdrv_create_co_entry);
389 qemu_coroutine_enter(co, &cco);
390 while (cco.ret == NOT_DONE) {
391 qemu_aio_wait();
392 }
393 }
394
395 ret = cco.ret;
396 g_free(cco.filename);
397
398 return ret;
bellardea2384d2004-08-01 21:59:26 +0000399}
400
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200401int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
402{
403 BlockDriver *drv;
404
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900405 drv = bdrv_find_protocol(filename);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200406 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000407 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200408 }
409
410 return bdrv_create(drv, filename, options);
411}
412
Jim Meyeringeba25052012-05-28 09:27:54 +0200413/*
414 * Create a uniquely-named empty temporary file.
415 * Return 0 upon success, otherwise a negative errno value.
416 */
417int get_tmp_filename(char *filename, int size)
418{
bellardd5249392004-08-03 21:14:23 +0000419#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000420 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200421 /* GetTempFileName requires that its output buffer (4th param)
422 have length MAX_PATH or greater. */
423 assert(size >= MAX_PATH);
424 return (GetTempPath(MAX_PATH, temp_dir)
425 && GetTempFileName(temp_dir, "qem", 0, filename)
426 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000427#else
bellardea2384d2004-08-01 21:59:26 +0000428 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000429 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000430 tmpdir = getenv("TMPDIR");
431 if (!tmpdir)
432 tmpdir = "/tmp";
Jim Meyeringeba25052012-05-28 09:27:54 +0200433 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
434 return -EOVERFLOW;
435 }
bellardea2384d2004-08-01 21:59:26 +0000436 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800437 if (fd < 0) {
438 return -errno;
439 }
440 if (close(fd) != 0) {
441 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200442 return -errno;
443 }
444 return 0;
bellardd5249392004-08-03 21:14:23 +0000445#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200446}
bellardea2384d2004-08-01 21:59:26 +0000447
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200448/*
449 * Detect host devices. By convention, /dev/cdrom[N] is always
450 * recognized as a host CDROM.
451 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200452static BlockDriver *find_hdev_driver(const char *filename)
453{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200454 int score_max = 0, score;
455 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200456
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100457 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200458 if (d->bdrv_probe_device) {
459 score = d->bdrv_probe_device(filename);
460 if (score > score_max) {
461 score_max = score;
462 drv = d;
463 }
464 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200465 }
466
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200467 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200468}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200469
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900470BlockDriver *bdrv_find_protocol(const char *filename)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200471{
472 BlockDriver *drv1;
473 char protocol[128];
474 int len;
475 const char *p;
476
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200477 /* TODO Drivers without bdrv_file_open must be specified explicitly */
478
Christoph Hellwig39508e72010-06-23 12:25:17 +0200479 /*
480 * XXX(hch): we really should not let host device detection
481 * override an explicit protocol specification, but moving this
482 * later breaks access to device names with colons in them.
483 * Thanks to the brain-dead persistent naming schemes on udev-
484 * based Linux systems those actually are quite common.
485 */
486 drv1 = find_hdev_driver(filename);
487 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200488 return drv1;
489 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200490
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000491 if (!path_has_protocol(filename)) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200492 return bdrv_find_format("file");
493 }
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000494 p = strchr(filename, ':');
495 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200496 len = p - filename;
497 if (len > sizeof(protocol) - 1)
498 len = sizeof(protocol) - 1;
499 memcpy(protocol, filename, len);
500 protocol[len] = '\0';
501 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
502 if (drv1->protocol_name &&
503 !strcmp(drv1->protocol_name, protocol)) {
504 return drv1;
505 }
506 }
507 return NULL;
508}
509
Stefan Weilc98ac352010-07-21 21:51:51 +0200510static int find_image_format(const char *filename, BlockDriver **pdrv)
bellardea2384d2004-08-01 21:59:26 +0000511{
bellard83f64092006-08-01 16:21:11 +0000512 int ret, score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000513 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000514 uint8_t buf[2048];
515 BlockDriverState *bs;
ths3b46e622007-09-17 08:09:54 +0000516
Naphtali Spreif5edb012010-01-17 16:48:13 +0200517 ret = bdrv_file_open(&bs, filename, 0);
Stefan Weilc98ac352010-07-21 21:51:51 +0200518 if (ret < 0) {
519 *pdrv = NULL;
520 return ret;
521 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700522
Kevin Wolf08a00552010-06-01 18:37:31 +0200523 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
524 if (bs->sg || !bdrv_is_inserted(bs)) {
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700525 bdrv_delete(bs);
Stefan Weilc98ac352010-07-21 21:51:51 +0200526 drv = bdrv_find_format("raw");
527 if (!drv) {
528 ret = -ENOENT;
529 }
530 *pdrv = drv;
531 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700532 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700533
bellard83f64092006-08-01 16:21:11 +0000534 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
535 bdrv_delete(bs);
536 if (ret < 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200537 *pdrv = NULL;
538 return ret;
bellard83f64092006-08-01 16:21:11 +0000539 }
540
bellardea2384d2004-08-01 21:59:26 +0000541 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200542 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100543 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000544 if (drv1->bdrv_probe) {
545 score = drv1->bdrv_probe(buf, ret, filename);
546 if (score > score_max) {
547 score_max = score;
548 drv = drv1;
549 }
bellardea2384d2004-08-01 21:59:26 +0000550 }
551 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200552 if (!drv) {
553 ret = -ENOENT;
554 }
555 *pdrv = drv;
556 return ret;
bellardea2384d2004-08-01 21:59:26 +0000557}
558
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100559/**
560 * Set the current 'total_sectors' value
561 */
562static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
563{
564 BlockDriver *drv = bs->drv;
565
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700566 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
567 if (bs->sg)
568 return 0;
569
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100570 /* query actual device if possible, otherwise just trust the hint */
571 if (drv->bdrv_getlength) {
572 int64_t length = drv->bdrv_getlength(bs);
573 if (length < 0) {
574 return length;
575 }
576 hint = length >> BDRV_SECTOR_BITS;
577 }
578
579 bs->total_sectors = hint;
580 return 0;
581}
582
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100583/**
584 * Set open flags for a given cache mode
585 *
586 * Return 0 on success, -1 if the cache mode was invalid.
587 */
588int bdrv_parse_cache_flags(const char *mode, int *flags)
589{
590 *flags &= ~BDRV_O_CACHE_MASK;
591
592 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
593 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100594 } else if (!strcmp(mode, "directsync")) {
595 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100596 } else if (!strcmp(mode, "writeback")) {
597 *flags |= BDRV_O_CACHE_WB;
598 } else if (!strcmp(mode, "unsafe")) {
599 *flags |= BDRV_O_CACHE_WB;
600 *flags |= BDRV_O_NO_FLUSH;
601 } else if (!strcmp(mode, "writethrough")) {
602 /* this is the default */
603 } else {
604 return -1;
605 }
606
607 return 0;
608}
609
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000610/**
611 * The copy-on-read flag is actually a reference count so multiple users may
612 * use the feature without worrying about clobbering its previous state.
613 * Copy-on-read stays enabled until all users have called to disable it.
614 */
615void bdrv_enable_copy_on_read(BlockDriverState *bs)
616{
617 bs->copy_on_read++;
618}
619
620void bdrv_disable_copy_on_read(BlockDriverState *bs)
621{
622 assert(bs->copy_on_read > 0);
623 bs->copy_on_read--;
624}
625
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200626/*
Kevin Wolf57915332010-04-14 15:24:50 +0200627 * Common part for opening disk images and files
628 */
629static int bdrv_open_common(BlockDriverState *bs, const char *filename,
630 int flags, BlockDriver *drv)
631{
632 int ret, open_flags;
633
634 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200635 assert(bs->file == NULL);
Kevin Wolf57915332010-04-14 15:24:50 +0200636
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100637 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
638
Kevin Wolf57915332010-04-14 15:24:50 +0200639 bs->open_flags = flags;
Kevin Wolf57915332010-04-14 15:24:50 +0200640 bs->buffer_alignment = 512;
641
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000642 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
643 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
644 bdrv_enable_copy_on_read(bs);
645 }
646
Kevin Wolf57915332010-04-14 15:24:50 +0200647 pstrcpy(bs->filename, sizeof(bs->filename), filename);
648
649 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
650 return -ENOTSUP;
651 }
652
653 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500654 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200655
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100656 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Paolo Bonzinie1e9b0a2012-06-06 00:04:53 +0200657 open_flags = flags | BDRV_O_CACHE_WB;
Kevin Wolf57915332010-04-14 15:24:50 +0200658
659 /*
660 * Clear flags that are internal to the block layer before opening the
661 * image.
662 */
Paolo Bonzinie1e9b0a2012-06-06 00:04:53 +0200663 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Kevin Wolf57915332010-04-14 15:24:50 +0200664
665 /*
Stefan Weilebabb672011-04-26 10:29:36 +0200666 * Snapshots should be writable.
Kevin Wolf57915332010-04-14 15:24:50 +0200667 */
668 if (bs->is_temporary) {
669 open_flags |= BDRV_O_RDWR;
670 }
671
Jeff Codybe028ad2012-09-20 15:13:17 -0400672 bs->read_only = !(open_flags & BDRV_O_RDWR);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100673
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200674 /* Open the image, either directly or using a protocol */
675 if (drv->bdrv_file_open) {
676 ret = drv->bdrv_file_open(bs, filename, open_flags);
677 } else {
678 ret = bdrv_file_open(&bs->file, filename, open_flags);
679 if (ret >= 0) {
680 ret = drv->bdrv_open(bs, open_flags);
681 }
682 }
683
Kevin Wolf57915332010-04-14 15:24:50 +0200684 if (ret < 0) {
685 goto free_and_fail;
686 }
687
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100688 ret = refresh_total_sectors(bs, bs->total_sectors);
689 if (ret < 0) {
690 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200691 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100692
Kevin Wolf57915332010-04-14 15:24:50 +0200693#ifndef _WIN32
694 if (bs->is_temporary) {
695 unlink(filename);
696 }
697#endif
698 return 0;
699
700free_and_fail:
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200701 if (bs->file) {
702 bdrv_delete(bs->file);
703 bs->file = NULL;
704 }
Anthony Liguori7267c092011-08-20 22:09:37 -0500705 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200706 bs->opaque = NULL;
707 bs->drv = NULL;
708 return ret;
709}
710
711/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200712 * Opens a file using a protocol (file, host_device, nbd, ...)
713 */
bellard83f64092006-08-01 16:21:11 +0000714int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
bellardb3380822004-03-14 21:38:54 +0000715{
bellard83f64092006-08-01 16:21:11 +0000716 BlockDriverState *bs;
Christoph Hellwig6db95602010-04-05 16:53:57 +0200717 BlockDriver *drv;
bellard83f64092006-08-01 16:21:11 +0000718 int ret;
719
MORITA Kazutakab50cbab2010-05-26 11:35:36 +0900720 drv = bdrv_find_protocol(filename);
Christoph Hellwig6db95602010-04-05 16:53:57 +0200721 if (!drv) {
722 return -ENOENT;
723 }
724
bellard83f64092006-08-01 16:21:11 +0000725 bs = bdrv_new("");
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200726 ret = bdrv_open_common(bs, filename, flags, drv);
bellard83f64092006-08-01 16:21:11 +0000727 if (ret < 0) {
728 bdrv_delete(bs);
729 return ret;
bellard3b0d4f62005-10-30 18:30:10 +0000730 }
aliguori71d07702009-03-03 17:37:16 +0000731 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +0000732 *pbs = bs;
733 return 0;
bellardea2384d2004-08-01 21:59:26 +0000734}
bellardfc01f7e2003-06-30 10:03:06 +0000735
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200736/*
737 * Opens a disk image (raw, qcow2, vmdk, ...)
738 */
Kevin Wolfd6e90982010-03-31 14:40:27 +0200739int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
740 BlockDriver *drv)
bellardea2384d2004-08-01 21:59:26 +0000741{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200742 int ret;
Kevin Wolf2b572812011-10-26 11:03:01 +0200743 char tmp_filename[PATH_MAX];
bellard712e7872005-04-28 21:09:32 +0000744
bellard83f64092006-08-01 16:21:11 +0000745 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +0000746 BlockDriverState *bs1;
747 int64_t total_size;
aliguori7c96d462008-09-12 17:54:13 +0000748 int is_protocol = 0;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200749 BlockDriver *bdrv_qcow2;
750 QEMUOptionParameter *options;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200751 char backing_filename[PATH_MAX];
ths3b46e622007-09-17 08:09:54 +0000752
bellardea2384d2004-08-01 21:59:26 +0000753 /* if snapshot, we create a temporary backing file and open it
754 instead of opening 'filename' directly */
755
756 /* if there is a backing file, use it */
757 bs1 = bdrv_new("");
Kevin Wolfd6e90982010-03-31 14:40:27 +0200758 ret = bdrv_open(bs1, filename, 0, drv);
aliguori51d7c002009-03-05 23:00:29 +0000759 if (ret < 0) {
bellardea2384d2004-08-01 21:59:26 +0000760 bdrv_delete(bs1);
aliguori51d7c002009-03-05 23:00:29 +0000761 return ret;
bellardea2384d2004-08-01 21:59:26 +0000762 }
Jes Sorensen3e829902010-05-27 16:20:30 +0200763 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +0000764
765 if (bs1->drv && bs1->drv->protocol_name)
766 is_protocol = 1;
767
bellardea2384d2004-08-01 21:59:26 +0000768 bdrv_delete(bs1);
ths3b46e622007-09-17 08:09:54 +0000769
Jim Meyeringeba25052012-05-28 09:27:54 +0200770 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
771 if (ret < 0) {
772 return ret;
773 }
aliguori7c96d462008-09-12 17:54:13 +0000774
775 /* Real path is meaningless for protocols */
776 if (is_protocol)
777 snprintf(backing_filename, sizeof(backing_filename),
778 "%s", filename);
Kirill A. Shutemov114cdfa2009-12-25 18:19:22 +0000779 else if (!realpath(filename, backing_filename))
780 return -errno;
aliguori7c96d462008-09-12 17:54:13 +0000781
Kevin Wolf91a073a2009-05-27 14:48:06 +0200782 bdrv_qcow2 = bdrv_find_format("qcow2");
783 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
784
Jes Sorensen3e829902010-05-27 16:20:30 +0200785 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +0200786 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
787 if (drv) {
788 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
789 drv->format_name);
790 }
791
792 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
Jan Kiszkad7487682010-04-29 18:24:50 +0200793 free_option_parameters(options);
aliguori51d7c002009-03-05 23:00:29 +0000794 if (ret < 0) {
795 return ret;
bellardea2384d2004-08-01 21:59:26 +0000796 }
Kevin Wolf91a073a2009-05-27 14:48:06 +0200797
bellardea2384d2004-08-01 21:59:26 +0000798 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +0200799 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +0000800 bs->is_temporary = 1;
801 }
bellard712e7872005-04-28 21:09:32 +0000802
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200803 /* Find the right image format driver */
Christoph Hellwig6db95602010-04-05 16:53:57 +0200804 if (!drv) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200805 ret = find_image_format(filename, &drv);
aliguori51d7c002009-03-05 23:00:29 +0000806 }
Christoph Hellwig69873072010-01-20 18:13:25 +0100807
aliguori51d7c002009-03-05 23:00:29 +0000808 if (!drv) {
aliguori51d7c002009-03-05 23:00:29 +0000809 goto unlink_and_fail;
bellardea2384d2004-08-01 21:59:26 +0000810 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200811
Jeff Codybe028ad2012-09-20 15:13:17 -0400812 if (flags & BDRV_O_RDWR) {
813 flags |= BDRV_O_ALLOW_RDWR;
814 }
815
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200816 /* Open the image */
817 ret = bdrv_open_common(bs, filename, flags, drv);
818 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +0100819 goto unlink_and_fail;
820 }
821
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200822 /* If there is a backing file, use it */
823 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
824 char backing_filename[PATH_MAX];
825 int back_flags;
826 BlockDriver *back_drv = NULL;
827
828 bs->backing_hd = bdrv_new("");
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200829 bdrv_get_full_backing_filename(bs, backing_filename,
830 sizeof(backing_filename));
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000831
832 if (bs->backing_format[0] != '\0') {
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200833 back_drv = bdrv_find_format(bs->backing_format);
Stefan Hajnoczidf2dbb42010-12-02 16:54:13 +0000834 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200835
836 /* backing files always opened read-only */
837 back_flags =
838 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
839
840 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
841 if (ret < 0) {
842 bdrv_close(bs);
843 return ret;
844 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200845 }
846
847 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +0200848 bdrv_dev_change_media_cb(bs, true);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200849 }
850
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800851 /* throttling disk I/O limits */
852 if (bs->io_limits_enabled) {
853 bdrv_io_limits_enable(bs);
854 }
855
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200856 return 0;
857
858unlink_and_fail:
859 if (bs->is_temporary) {
860 unlink(filename);
861 }
862 return ret;
863}
864
Jeff Codye971aa12012-09-20 15:13:19 -0400865typedef struct BlockReopenQueueEntry {
866 bool prepared;
867 BDRVReopenState state;
868 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
869} BlockReopenQueueEntry;
870
871/*
872 * Adds a BlockDriverState to a simple queue for an atomic, transactional
873 * reopen of multiple devices.
874 *
875 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
876 * already performed, or alternatively may be NULL a new BlockReopenQueue will
877 * be created and initialized. This newly created BlockReopenQueue should be
878 * passed back in for subsequent calls that are intended to be of the same
879 * atomic 'set'.
880 *
881 * bs is the BlockDriverState to add to the reopen queue.
882 *
883 * flags contains the open flags for the associated bs
884 *
885 * returns a pointer to bs_queue, which is either the newly allocated
886 * bs_queue, or the existing bs_queue being used.
887 *
888 */
889BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
890 BlockDriverState *bs, int flags)
891{
892 assert(bs != NULL);
893
894 BlockReopenQueueEntry *bs_entry;
895 if (bs_queue == NULL) {
896 bs_queue = g_new0(BlockReopenQueue, 1);
897 QSIMPLEQ_INIT(bs_queue);
898 }
899
900 if (bs->file) {
901 bdrv_reopen_queue(bs_queue, bs->file, flags);
902 }
903
904 bs_entry = g_new0(BlockReopenQueueEntry, 1);
905 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
906
907 bs_entry->state.bs = bs;
908 bs_entry->state.flags = flags;
909
910 return bs_queue;
911}
912
913/*
914 * Reopen multiple BlockDriverStates atomically & transactionally.
915 *
916 * The queue passed in (bs_queue) must have been built up previous
917 * via bdrv_reopen_queue().
918 *
919 * Reopens all BDS specified in the queue, with the appropriate
920 * flags. All devices are prepared for reopen, and failure of any
921 * device will cause all device changes to be abandonded, and intermediate
922 * data cleaned up.
923 *
924 * If all devices prepare successfully, then the changes are committed
925 * to all devices.
926 *
927 */
928int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
929{
930 int ret = -1;
931 BlockReopenQueueEntry *bs_entry, *next;
932 Error *local_err = NULL;
933
934 assert(bs_queue != NULL);
935
936 bdrv_drain_all();
937
938 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
939 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
940 error_propagate(errp, local_err);
941 goto cleanup;
942 }
943 bs_entry->prepared = true;
944 }
945
946 /* If we reach this point, we have success and just need to apply the
947 * changes
948 */
949 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
950 bdrv_reopen_commit(&bs_entry->state);
951 }
952
953 ret = 0;
954
955cleanup:
956 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
957 if (ret && bs_entry->prepared) {
958 bdrv_reopen_abort(&bs_entry->state);
959 }
960 g_free(bs_entry);
961 }
962 g_free(bs_queue);
963 return ret;
964}
965
966
967/* Reopen a single BlockDriverState with the specified flags. */
968int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
969{
970 int ret = -1;
971 Error *local_err = NULL;
972 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
973
974 ret = bdrv_reopen_multiple(queue, &local_err);
975 if (local_err != NULL) {
976 error_propagate(errp, local_err);
977 }
978 return ret;
979}
980
981
982/*
983 * Prepares a BlockDriverState for reopen. All changes are staged in the
984 * 'opaque' field of the BDRVReopenState, which is used and allocated by
985 * the block driver layer .bdrv_reopen_prepare()
986 *
987 * bs is the BlockDriverState to reopen
988 * flags are the new open flags
989 * queue is the reopen queue
990 *
991 * Returns 0 on success, non-zero on error. On error errp will be set
992 * as well.
993 *
994 * On failure, bdrv_reopen_abort() will be called to clean up any data.
995 * It is the responsibility of the caller to then call the abort() or
996 * commit() for any other BDS that have been left in a prepare() state
997 *
998 */
999int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1000 Error **errp)
1001{
1002 int ret = -1;
1003 Error *local_err = NULL;
1004 BlockDriver *drv;
1005
1006 assert(reopen_state != NULL);
1007 assert(reopen_state->bs->drv != NULL);
1008 drv = reopen_state->bs->drv;
1009
1010 /* if we are to stay read-only, do not allow permission change
1011 * to r/w */
1012 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1013 reopen_state->flags & BDRV_O_RDWR) {
1014 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1015 reopen_state->bs->device_name);
1016 goto error;
1017 }
1018
1019
1020 ret = bdrv_flush(reopen_state->bs);
1021 if (ret) {
1022 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1023 strerror(-ret));
1024 goto error;
1025 }
1026
1027 if (drv->bdrv_reopen_prepare) {
1028 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1029 if (ret) {
1030 if (local_err != NULL) {
1031 error_propagate(errp, local_err);
1032 } else {
1033 error_set(errp, QERR_OPEN_FILE_FAILED,
1034 reopen_state->bs->filename);
1035 }
1036 goto error;
1037 }
1038 } else {
1039 /* It is currently mandatory to have a bdrv_reopen_prepare()
1040 * handler for each supported drv. */
1041 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1042 drv->format_name, reopen_state->bs->device_name,
1043 "reopening of file");
1044 ret = -1;
1045 goto error;
1046 }
1047
1048 ret = 0;
1049
1050error:
1051 return ret;
1052}
1053
1054/*
1055 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1056 * makes them final by swapping the staging BlockDriverState contents into
1057 * the active BlockDriverState contents.
1058 */
1059void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1060{
1061 BlockDriver *drv;
1062
1063 assert(reopen_state != NULL);
1064 drv = reopen_state->bs->drv;
1065 assert(drv != NULL);
1066
1067 /* If there are any driver level actions to take */
1068 if (drv->bdrv_reopen_commit) {
1069 drv->bdrv_reopen_commit(reopen_state);
1070 }
1071
1072 /* set BDS specific flags now */
1073 reopen_state->bs->open_flags = reopen_state->flags;
1074 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1075 BDRV_O_CACHE_WB);
1076 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1077}
1078
1079/*
1080 * Abort the reopen, and delete and free the staged changes in
1081 * reopen_state
1082 */
1083void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1084{
1085 BlockDriver *drv;
1086
1087 assert(reopen_state != NULL);
1088 drv = reopen_state->bs->drv;
1089 assert(drv != NULL);
1090
1091 if (drv->bdrv_reopen_abort) {
1092 drv->bdrv_reopen_abort(reopen_state);
1093 }
1094}
1095
1096
bellardfc01f7e2003-06-30 10:03:06 +00001097void bdrv_close(BlockDriverState *bs)
1098{
Liu Yuan80ccf932012-04-20 17:10:56 +08001099 bdrv_flush(bs);
bellard19cb3732006-08-19 11:45:59 +00001100 if (bs->drv) {
Paolo Bonzini3e914652012-03-30 13:17:11 +02001101 if (bs->job) {
1102 block_job_cancel_sync(bs->job);
1103 }
Kevin Wolf7094f122012-04-11 11:06:37 +02001104 bdrv_drain_all();
1105
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001106 if (bs == bs_snapshots) {
1107 bs_snapshots = NULL;
1108 }
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001109 if (bs->backing_hd) {
bellardea2384d2004-08-01 21:59:26 +00001110 bdrv_delete(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001111 bs->backing_hd = NULL;
1112 }
bellardea2384d2004-08-01 21:59:26 +00001113 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001114 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001115#ifdef _WIN32
1116 if (bs->is_temporary) {
1117 unlink(bs->filename);
1118 }
bellard67b915a2004-03-31 23:37:16 +00001119#endif
bellardea2384d2004-08-01 21:59:26 +00001120 bs->opaque = NULL;
1121 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001122 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001123 bs->backing_file[0] = '\0';
1124 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001125 bs->total_sectors = 0;
1126 bs->encrypted = 0;
1127 bs->valid_key = 0;
1128 bs->sg = 0;
1129 bs->growable = 0;
bellardb3380822004-03-14 21:38:54 +00001130
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001131 if (bs->file != NULL) {
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001132 bdrv_delete(bs->file);
1133 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001134 }
bellardb3380822004-03-14 21:38:54 +00001135 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001136
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001137 bdrv_dev_change_media_cb(bs, false);
1138
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001139 /*throttling disk I/O limits*/
1140 if (bs->io_limits_enabled) {
1141 bdrv_io_limits_disable(bs);
1142 }
bellardb3380822004-03-14 21:38:54 +00001143}
1144
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001145void bdrv_close_all(void)
1146{
1147 BlockDriverState *bs;
1148
1149 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1150 bdrv_close(bs);
1151 }
1152}
1153
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001154/*
1155 * Wait for pending requests to complete across all BlockDriverStates
1156 *
1157 * This function does not flush data to disk, use bdrv_flush_all() for that
1158 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001159 *
1160 * Note that completion of an asynchronous I/O operation can trigger any
1161 * number of other I/O operations on other devices---for example a coroutine
1162 * can be arbitrarily complex and a constant flow of I/O can come until the
1163 * coroutine is complete. Because of this, it is not possible to have a
1164 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001165 */
1166void bdrv_drain_all(void)
1167{
1168 BlockDriverState *bs;
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001169 bool busy;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001170
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001171 do {
1172 busy = qemu_aio_wait();
1173
1174 /* FIXME: We do not have timer support here, so this is effectively
1175 * a busy wait.
1176 */
1177 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
1179 qemu_co_queue_restart_all(&bs->throttled_reqs);
1180 busy = true;
1181 }
1182 }
1183 } while (busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001184
1185 /* If requests are still pending there is a bug somewhere */
1186 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1187 assert(QLIST_EMPTY(&bs->tracked_requests));
1188 assert(qemu_co_queue_empty(&bs->throttled_reqs));
1189 }
1190}
1191
Ryan Harperd22b2f42011-03-29 20:51:47 -05001192/* make a BlockDriverState anonymous by removing from bdrv_state list.
1193 Also, NULL terminate the device_name to prevent double remove */
1194void bdrv_make_anon(BlockDriverState *bs)
1195{
1196 if (bs->device_name[0] != '\0') {
1197 QTAILQ_REMOVE(&bdrv_states, bs, list);
1198 }
1199 bs->device_name[0] = '\0';
1200}
1201
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001202static void bdrv_rebind(BlockDriverState *bs)
1203{
1204 if (bs->drv && bs->drv->bdrv_rebind) {
1205 bs->drv->bdrv_rebind(bs);
1206 }
1207}
1208
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001209static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1210 BlockDriverState *bs_src)
1211{
1212 /* move some fields that need to stay attached to the device */
1213 bs_dest->open_flags = bs_src->open_flags;
1214
1215 /* dev info */
1216 bs_dest->dev_ops = bs_src->dev_ops;
1217 bs_dest->dev_opaque = bs_src->dev_opaque;
1218 bs_dest->dev = bs_src->dev;
1219 bs_dest->buffer_alignment = bs_src->buffer_alignment;
1220 bs_dest->copy_on_read = bs_src->copy_on_read;
1221
1222 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1223
1224 /* i/o timing parameters */
1225 bs_dest->slice_time = bs_src->slice_time;
1226 bs_dest->slice_start = bs_src->slice_start;
1227 bs_dest->slice_end = bs_src->slice_end;
1228 bs_dest->io_limits = bs_src->io_limits;
1229 bs_dest->io_base = bs_src->io_base;
1230 bs_dest->throttled_reqs = bs_src->throttled_reqs;
1231 bs_dest->block_timer = bs_src->block_timer;
1232 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1233
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001234 /* r/w error */
1235 bs_dest->on_read_error = bs_src->on_read_error;
1236 bs_dest->on_write_error = bs_src->on_write_error;
1237
1238 /* i/o status */
1239 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1240 bs_dest->iostatus = bs_src->iostatus;
1241
1242 /* dirty bitmap */
1243 bs_dest->dirty_count = bs_src->dirty_count;
1244 bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
1245
1246 /* job */
1247 bs_dest->in_use = bs_src->in_use;
1248 bs_dest->job = bs_src->job;
1249
1250 /* keep the same entry in bdrv_states */
1251 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1252 bs_src->device_name);
1253 bs_dest->list = bs_src->list;
1254}
1255
1256/*
1257 * Swap bs contents for two image chains while they are live,
1258 * while keeping required fields on the BlockDriverState that is
1259 * actually attached to a device.
1260 *
1261 * This will modify the BlockDriverState fields, and swap contents
1262 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1263 *
1264 * bs_new is required to be anonymous.
1265 *
1266 * This function does not create any image files.
1267 */
1268void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1269{
1270 BlockDriverState tmp;
1271
1272 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1273 assert(bs_new->device_name[0] == '\0');
1274 assert(bs_new->dirty_bitmap == NULL);
1275 assert(bs_new->job == NULL);
1276 assert(bs_new->dev == NULL);
1277 assert(bs_new->in_use == 0);
1278 assert(bs_new->io_limits_enabled == false);
1279 assert(bs_new->block_timer == NULL);
1280
1281 tmp = *bs_new;
1282 *bs_new = *bs_old;
1283 *bs_old = tmp;
1284
1285 /* there are some fields that should not be swapped, move them back */
1286 bdrv_move_feature_fields(&tmp, bs_old);
1287 bdrv_move_feature_fields(bs_old, bs_new);
1288 bdrv_move_feature_fields(bs_new, &tmp);
1289
1290 /* bs_new shouldn't be in bdrv_states even after the swap! */
1291 assert(bs_new->device_name[0] == '\0');
1292
1293 /* Check a few fields that should remain attached to the device */
1294 assert(bs_new->dev == NULL);
1295 assert(bs_new->job == NULL);
1296 assert(bs_new->in_use == 0);
1297 assert(bs_new->io_limits_enabled == false);
1298 assert(bs_new->block_timer == NULL);
1299
1300 bdrv_rebind(bs_new);
1301 bdrv_rebind(bs_old);
1302}
1303
Jeff Cody8802d1f2012-02-28 15:54:06 -05001304/*
1305 * Add new bs contents at the top of an image chain while the chain is
1306 * live, while keeping required fields on the top layer.
1307 *
1308 * This will modify the BlockDriverState fields, and swap contents
1309 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1310 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001311 * bs_new is required to be anonymous.
1312 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001313 * This function does not create any image files.
1314 */
1315void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1316{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001317 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001318
1319 /* The contents of 'tmp' will become bs_top, as we are
1320 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001321 bs_top->backing_hd = bs_new;
1322 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1323 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1324 bs_new->filename);
1325 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1326 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001327}
1328
bellardb3380822004-03-14 21:38:54 +00001329void bdrv_delete(BlockDriverState *bs)
1330{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001331 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001332 assert(!bs->job);
1333 assert(!bs->in_use);
Markus Armbruster18846de2010-06-29 16:58:30 +02001334
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001335 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001336 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001337
bellardb3380822004-03-14 21:38:54 +00001338 bdrv_close(bs);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001339
Markus Armbrusterf9092b12010-06-25 10:33:39 +02001340 assert(bs != bs_snapshots);
Anthony Liguori7267c092011-08-20 22:09:37 -05001341 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001342}
1343
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001344int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1345/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001346{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001347 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001348 return -EBUSY;
1349 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001350 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001351 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001352 return 0;
1353}
1354
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001355/* TODO qdevified devices don't use this, remove when devices are qdevified */
1356void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001357{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001358 if (bdrv_attach_dev(bs, dev) < 0) {
1359 abort();
1360 }
1361}
1362
1363void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1364/* TODO change to DeviceState *dev when all users are qdevified */
1365{
1366 assert(bs->dev == dev);
1367 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001368 bs->dev_ops = NULL;
1369 bs->dev_opaque = NULL;
Markus Armbruster29e05f22011-09-06 18:58:57 +02001370 bs->buffer_alignment = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02001371}
1372
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001373/* TODO change to return DeviceState * when all users are qdevified */
1374void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02001375{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001376 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02001377}
1378
Markus Armbruster0e49de52011-08-03 15:07:41 +02001379void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1380 void *opaque)
1381{
1382 bs->dev_ops = ops;
1383 bs->dev_opaque = opaque;
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001384 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1385 bs_snapshots = NULL;
1386 }
Markus Armbruster0e49de52011-08-03 15:07:41 +02001387}
1388
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001389void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02001390 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02001391{
1392 QObject *data;
1393 const char *action_str;
1394
1395 switch (action) {
1396 case BDRV_ACTION_REPORT:
1397 action_str = "report";
1398 break;
1399 case BDRV_ACTION_IGNORE:
1400 action_str = "ignore";
1401 break;
1402 case BDRV_ACTION_STOP:
1403 action_str = "stop";
1404 break;
1405 default:
1406 abort();
1407 }
1408
1409 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1410 bdrv->device_name,
1411 action_str,
1412 is_read ? "read" : "write");
1413 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1414
1415 qobject_decref(data);
1416}
1417
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001418static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1419{
1420 QObject *data;
1421
1422 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1423 bdrv_get_device_name(bs), ejected);
1424 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1425
1426 qobject_decref(data);
1427}
1428
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001429static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02001430{
Markus Armbruster145feb12011-08-03 15:07:42 +02001431 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001432 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001433 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02001434 if (tray_was_closed) {
1435 /* tray open */
1436 bdrv_emit_qmp_eject_event(bs, true);
1437 }
1438 if (load) {
1439 /* tray close */
1440 bdrv_emit_qmp_eject_event(bs, false);
1441 }
Markus Armbruster145feb12011-08-03 15:07:42 +02001442 }
1443}
1444
Markus Armbruster2c6942f2011-09-06 18:58:51 +02001445bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1446{
1447 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1448}
1449
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01001450void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1451{
1452 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1453 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1454 }
1455}
1456
Markus Armbrustere4def802011-09-06 18:58:53 +02001457bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1458{
1459 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1460 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1461 }
1462 return false;
1463}
1464
Markus Armbruster145feb12011-08-03 15:07:42 +02001465static void bdrv_dev_resize_cb(BlockDriverState *bs)
1466{
1467 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1468 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02001469 }
1470}
1471
Markus Armbrusterf1076392011-09-06 18:58:46 +02001472bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1473{
1474 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1475 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1476 }
1477 return false;
1478}
1479
aliguorie97fc192009-04-21 23:11:50 +00001480/*
1481 * Run consistency checks on an image
1482 *
Kevin Wolfe076f332010-06-29 11:43:13 +02001483 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02001484 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02001485 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00001486 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02001487int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00001488{
1489 if (bs->drv->bdrv_check == NULL) {
1490 return -ENOTSUP;
1491 }
1492
Kevin Wolfe076f332010-06-29 11:43:13 +02001493 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02001494 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00001495}
1496
Kevin Wolf8a426612010-07-16 17:17:01 +02001497#define COMMIT_BUF_SECTORS 2048
1498
bellard33e39632003-07-06 17:15:21 +00001499/* commit COW file into the raw image */
1500int bdrv_commit(BlockDriverState *bs)
1501{
bellard19cb3732006-08-19 11:45:59 +00001502 BlockDriver *drv = bs->drv;
Kevin Wolf8a426612010-07-16 17:17:01 +02001503 int64_t sector, total_sectors;
1504 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04001505 int ret = 0;
Kevin Wolf8a426612010-07-16 17:17:01 +02001506 uint8_t *buf;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001507 char filename[1024];
bellard33e39632003-07-06 17:15:21 +00001508
bellard19cb3732006-08-19 11:45:59 +00001509 if (!drv)
1510 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001511
1512 if (!bs->backing_hd) {
1513 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00001514 }
1515
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00001516 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1517 return -EBUSY;
1518 }
1519
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001520 ro = bs->backing_hd->read_only;
1521 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1522 open_flags = bs->backing_hd->open_flags;
1523
1524 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04001525 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
1526 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001527 }
bellard33e39632003-07-06 17:15:21 +00001528 }
bellardea2384d2004-08-01 21:59:26 +00001529
Jan Kiszka6ea44302009-11-30 18:21:19 +01001530 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05001531 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00001532
Kevin Wolf8a426612010-07-16 17:17:01 +02001533 for (sector = 0; sector < total_sectors; sector += n) {
Stefan Hajnoczi05c4af52011-11-14 12:44:18 +00001534 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
Kevin Wolf8a426612010-07-16 17:17:01 +02001535
1536 if (bdrv_read(bs, sector, buf, n) != 0) {
1537 ret = -EIO;
1538 goto ro_cleanup;
1539 }
1540
1541 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1542 ret = -EIO;
1543 goto ro_cleanup;
1544 }
bellardea2384d2004-08-01 21:59:26 +00001545 }
1546 }
bellard95389c82005-12-18 18:28:15 +00001547
Christoph Hellwig1d449522010-01-17 12:32:30 +01001548 if (drv->bdrv_make_empty) {
1549 ret = drv->bdrv_make_empty(bs);
1550 bdrv_flush(bs);
1551 }
bellard95389c82005-12-18 18:28:15 +00001552
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01001553 /*
1554 * Make sure all data we wrote to the backing device is actually
1555 * stable on disk.
1556 */
1557 if (bs->backing_hd)
1558 bdrv_flush(bs->backing_hd);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001559
1560ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05001561 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001562
1563 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04001564 /* ignoring error return here */
1565 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02001566 }
1567
Christoph Hellwig1d449522010-01-17 12:32:30 +01001568 return ret;
bellard33e39632003-07-06 17:15:21 +00001569}
1570
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001571int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001572{
1573 BlockDriverState *bs;
1574
1575 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001576 int ret = bdrv_commit(bs);
1577 if (ret < 0) {
1578 return ret;
1579 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001580 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00001581 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02001582}
1583
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001584struct BdrvTrackedRequest {
1585 BlockDriverState *bs;
1586 int64_t sector_num;
1587 int nb_sectors;
1588 bool is_write;
1589 QLIST_ENTRY(BdrvTrackedRequest) list;
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001590 Coroutine *co; /* owner, used for deadlock detection */
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001591 CoQueue wait_queue; /* coroutines blocked on this request */
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001592};
1593
1594/**
1595 * Remove an active request from the tracked requests list
1596 *
1597 * This function should be called when a tracked request is completing.
1598 */
1599static void tracked_request_end(BdrvTrackedRequest *req)
1600{
1601 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001602 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001603}
1604
1605/**
1606 * Add an active request to the tracked requests list
1607 */
1608static void tracked_request_begin(BdrvTrackedRequest *req,
1609 BlockDriverState *bs,
1610 int64_t sector_num,
1611 int nb_sectors, bool is_write)
1612{
1613 *req = (BdrvTrackedRequest){
1614 .bs = bs,
1615 .sector_num = sector_num,
1616 .nb_sectors = nb_sectors,
1617 .is_write = is_write,
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001618 .co = qemu_coroutine_self(),
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001619 };
1620
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001621 qemu_co_queue_init(&req->wait_queue);
1622
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00001623 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1624}
1625
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001626/**
1627 * Round a region to cluster boundaries
1628 */
1629static void round_to_clusters(BlockDriverState *bs,
1630 int64_t sector_num, int nb_sectors,
1631 int64_t *cluster_sector_num,
1632 int *cluster_nb_sectors)
1633{
1634 BlockDriverInfo bdi;
1635
1636 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1637 *cluster_sector_num = sector_num;
1638 *cluster_nb_sectors = nb_sectors;
1639 } else {
1640 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1641 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1642 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1643 nb_sectors, c);
1644 }
1645}
1646
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001647static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1648 int64_t sector_num, int nb_sectors) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001649 /* aaaa bbbb */
1650 if (sector_num >= req->sector_num + req->nb_sectors) {
1651 return false;
1652 }
1653 /* bbbb aaaa */
1654 if (req->sector_num >= sector_num + nb_sectors) {
1655 return false;
1656 }
1657 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001658}
1659
1660static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1661 int64_t sector_num, int nb_sectors)
1662{
1663 BdrvTrackedRequest *req;
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001664 int64_t cluster_sector_num;
1665 int cluster_nb_sectors;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001666 bool retry;
1667
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001668 /* If we touch the same cluster it counts as an overlap. This guarantees
1669 * that allocating writes will be serialized and not race with each other
1670 * for the same cluster. For example, in copy-on-read it ensures that the
1671 * CoR read and write operations are atomic and guest writes cannot
1672 * interleave between them.
1673 */
1674 round_to_clusters(bs, sector_num, nb_sectors,
1675 &cluster_sector_num, &cluster_nb_sectors);
1676
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001677 do {
1678 retry = false;
1679 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00001680 if (tracked_request_overlaps(req, cluster_sector_num,
1681 cluster_nb_sectors)) {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00001682 /* Hitting this means there was a reentrant request, for
1683 * example, a block driver issuing nested requests. This must
1684 * never happen since it means deadlock.
1685 */
1686 assert(qemu_coroutine_self() != req->co);
1687
Stefan Hajnoczif4658282011-11-17 13:40:29 +00001688 qemu_co_queue_wait(&req->wait_queue);
1689 retry = true;
1690 break;
1691 }
1692 }
1693 } while (retry);
1694}
1695
Kevin Wolf756e6732010-01-12 12:55:17 +01001696/*
1697 * Return values:
1698 * 0 - success
1699 * -EINVAL - backing format specified, but no file
1700 * -ENOSPC - can't update the backing file because no space is left in the
1701 * image file header
1702 * -ENOTSUP - format driver doesn't support changing the backing file
1703 */
1704int bdrv_change_backing_file(BlockDriverState *bs,
1705 const char *backing_file, const char *backing_fmt)
1706{
1707 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02001708 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001709
Paolo Bonzini5f377792012-04-12 14:01:01 +02001710 /* Backing file format doesn't make sense without a backing file */
1711 if (backing_fmt && !backing_file) {
1712 return -EINVAL;
1713 }
1714
Kevin Wolf756e6732010-01-12 12:55:17 +01001715 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001716 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01001717 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02001718 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01001719 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02001720
1721 if (ret == 0) {
1722 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1723 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1724 }
1725 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01001726}
1727
Jeff Cody6ebdcee2012-09-27 13:29:12 -04001728/*
1729 * Finds the image layer in the chain that has 'bs' as its backing file.
1730 *
1731 * active is the current topmost image.
1732 *
1733 * Returns NULL if bs is not found in active's image chain,
1734 * or if active == bs.
1735 */
1736BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
1737 BlockDriverState *bs)
1738{
1739 BlockDriverState *overlay = NULL;
1740 BlockDriverState *intermediate;
1741
1742 assert(active != NULL);
1743 assert(bs != NULL);
1744
1745 /* if bs is the same as active, then by definition it has no overlay
1746 */
1747 if (active == bs) {
1748 return NULL;
1749 }
1750
1751 intermediate = active;
1752 while (intermediate->backing_hd) {
1753 if (intermediate->backing_hd == bs) {
1754 overlay = intermediate;
1755 break;
1756 }
1757 intermediate = intermediate->backing_hd;
1758 }
1759
1760 return overlay;
1761}
1762
1763typedef struct BlkIntermediateStates {
1764 BlockDriverState *bs;
1765 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
1766} BlkIntermediateStates;
1767
1768
1769/*
1770 * Drops images above 'base' up to and including 'top', and sets the image
1771 * above 'top' to have base as its backing file.
1772 *
1773 * Requires that the overlay to 'top' is opened r/w, so that the backing file
1774 * information in 'bs' can be properly updated.
1775 *
1776 * E.g., this will convert the following chain:
1777 * bottom <- base <- intermediate <- top <- active
1778 *
1779 * to
1780 *
1781 * bottom <- base <- active
1782 *
1783 * It is allowed for bottom==base, in which case it converts:
1784 *
1785 * base <- intermediate <- top <- active
1786 *
1787 * to
1788 *
1789 * base <- active
1790 *
1791 * Error conditions:
1792 * if active == top, that is considered an error
1793 *
1794 */
1795int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
1796 BlockDriverState *base)
1797{
1798 BlockDriverState *intermediate;
1799 BlockDriverState *base_bs = NULL;
1800 BlockDriverState *new_top_bs = NULL;
1801 BlkIntermediateStates *intermediate_state, *next;
1802 int ret = -EIO;
1803
1804 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
1805 QSIMPLEQ_INIT(&states_to_delete);
1806
1807 if (!top->drv || !base->drv) {
1808 goto exit;
1809 }
1810
1811 new_top_bs = bdrv_find_overlay(active, top);
1812
1813 if (new_top_bs == NULL) {
1814 /* we could not find the image above 'top', this is an error */
1815 goto exit;
1816 }
1817
1818 /* special case of new_top_bs->backing_hd already pointing to base - nothing
1819 * to do, no intermediate images */
1820 if (new_top_bs->backing_hd == base) {
1821 ret = 0;
1822 goto exit;
1823 }
1824
1825 intermediate = top;
1826
1827 /* now we will go down through the list, and add each BDS we find
1828 * into our deletion queue, until we hit the 'base'
1829 */
1830 while (intermediate) {
1831 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
1832 intermediate_state->bs = intermediate;
1833 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
1834
1835 if (intermediate->backing_hd == base) {
1836 base_bs = intermediate->backing_hd;
1837 break;
1838 }
1839 intermediate = intermediate->backing_hd;
1840 }
1841 if (base_bs == NULL) {
1842 /* something went wrong, we did not end at the base. safely
1843 * unravel everything, and exit with error */
1844 goto exit;
1845 }
1846
1847 /* success - we can delete the intermediate states, and link top->base */
1848 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
1849 base_bs->drv ? base_bs->drv->format_name : "");
1850 if (ret) {
1851 goto exit;
1852 }
1853 new_top_bs->backing_hd = base_bs;
1854
1855
1856 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1857 /* so that bdrv_close() does not recursively close the chain */
1858 intermediate_state->bs->backing_hd = NULL;
1859 bdrv_delete(intermediate_state->bs);
1860 }
1861 ret = 0;
1862
1863exit:
1864 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
1865 g_free(intermediate_state);
1866 }
1867 return ret;
1868}
1869
1870
aliguori71d07702009-03-03 17:37:16 +00001871static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1872 size_t size)
1873{
1874 int64_t len;
1875
1876 if (!bdrv_is_inserted(bs))
1877 return -ENOMEDIUM;
1878
1879 if (bs->growable)
1880 return 0;
1881
1882 len = bdrv_getlength(bs);
1883
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02001884 if (offset < 0)
1885 return -EIO;
1886
1887 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00001888 return -EIO;
1889
1890 return 0;
1891}
1892
1893static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1894 int nb_sectors)
1895{
Jes Sorenseneb5a3162010-05-27 16:20:31 +02001896 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1897 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00001898}
1899
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001900typedef struct RwCo {
1901 BlockDriverState *bs;
1902 int64_t sector_num;
1903 int nb_sectors;
1904 QEMUIOVector *qiov;
1905 bool is_write;
1906 int ret;
1907} RwCo;
1908
1909static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1910{
1911 RwCo *rwco = opaque;
1912
1913 if (!rwco->is_write) {
1914 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00001915 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001916 } else {
1917 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00001918 rwco->nb_sectors, rwco->qiov, 0);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001919 }
1920}
1921
1922/*
1923 * Process a synchronous request using coroutines
1924 */
1925static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1926 int nb_sectors, bool is_write)
1927{
1928 QEMUIOVector qiov;
1929 struct iovec iov = {
1930 .iov_base = (void *)buf,
1931 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1932 };
1933 Coroutine *co;
1934 RwCo rwco = {
1935 .bs = bs,
1936 .sector_num = sector_num,
1937 .nb_sectors = nb_sectors,
1938 .qiov = &qiov,
1939 .is_write = is_write,
1940 .ret = NOT_DONE,
1941 };
1942
1943 qemu_iovec_init_external(&qiov, &iov, 1);
1944
Zhi Yong Wu498e3862012-04-02 18:59:34 +08001945 /**
1946 * In sync call context, when the vcpu is blocked, this throttling timer
1947 * will not fire; so the I/O throttling function has to be disabled here
1948 * if it has been enabled.
1949 */
1950 if (bs->io_limits_enabled) {
1951 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1952 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1953 bdrv_io_limits_disable(bs);
1954 }
1955
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001956 if (qemu_in_coroutine()) {
1957 /* Fast-path if already in coroutine context */
1958 bdrv_rw_co_entry(&rwco);
1959 } else {
1960 co = qemu_coroutine_create(bdrv_rw_co_entry);
1961 qemu_coroutine_enter(co, &rwco);
1962 while (rwco.ret == NOT_DONE) {
1963 qemu_aio_wait();
1964 }
1965 }
1966 return rwco.ret;
1967}
1968
bellard19cb3732006-08-19 11:45:59 +00001969/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00001970int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00001971 uint8_t *buf, int nb_sectors)
1972{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01001973 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
bellardfc01f7e2003-06-30 10:03:06 +00001974}
1975
Markus Armbruster07d27a42012-06-29 17:34:29 +02001976/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
1977int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
1978 uint8_t *buf, int nb_sectors)
1979{
1980 bool enabled;
1981 int ret;
1982
1983 enabled = bs->io_limits_enabled;
1984 bs->io_limits_enabled = false;
1985 ret = bdrv_read(bs, 0, buf, 1);
1986 bs->io_limits_enabled = enabled;
1987 return ret;
1988}
1989
Paolo Bonzini71df14f2012-04-12 14:01:04 +02001990#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1991
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001992static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001993 int nb_sectors, int dirty)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02001994{
1995 int64_t start, end;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001996 unsigned long val, idx, bit;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01001997
Jan Kiszka6ea44302009-11-30 18:21:19 +01001998 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkac6d22832009-11-30 18:21:20 +01001999 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002000
2001 for (; start <= end; start++) {
Paolo Bonzini71df14f2012-04-12 14:01:04 +02002002 idx = start / BITS_PER_LONG;
2003 bit = start % BITS_PER_LONG;
Jan Kiszkac6d22832009-11-30 18:21:20 +01002004 val = bs->dirty_bitmap[idx];
2005 if (dirty) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002006 if (!(val & (1UL << bit))) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02002007 bs->dirty_count++;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002008 val |= 1UL << bit;
Liran Schouraaa0eb72010-01-26 10:31:48 +02002009 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01002010 } else {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002011 if (val & (1UL << bit)) {
Liran Schouraaa0eb72010-01-26 10:31:48 +02002012 bs->dirty_count--;
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02002013 val &= ~(1UL << bit);
Liran Schouraaa0eb72010-01-26 10:31:48 +02002014 }
Jan Kiszkac6d22832009-11-30 18:21:20 +01002015 }
2016 bs->dirty_bitmap[idx] = val;
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002017 }
2018}
2019
ths5fafdf22007-09-16 21:08:06 +00002020/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002021 -EIO generic I/O error (may happen for all errors)
2022 -ENOMEDIUM No media inserted.
2023 -EINVAL Invalid sector number or nb_sectors
2024 -EACCES Trying to write a read-only device
2025*/
ths5fafdf22007-09-16 21:08:06 +00002026int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002027 const uint8_t *buf, int nb_sectors)
2028{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002029 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
bellard83f64092006-08-01 16:21:11 +00002030}
2031
aliguorieda578e2009-03-12 19:57:16 +00002032int bdrv_pread(BlockDriverState *bs, int64_t offset,
2033 void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00002034{
Jan Kiszka6ea44302009-11-30 18:21:19 +01002035 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00002036 int len, nb_sectors, count;
2037 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002038 int ret;
bellard83f64092006-08-01 16:21:11 +00002039
2040 count = count1;
2041 /* first read to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002042 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00002043 if (len > count)
2044 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002045 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002046 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002047 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2048 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002049 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
bellard83f64092006-08-01 16:21:11 +00002050 count -= len;
2051 if (count == 0)
2052 return count1;
2053 sector_num++;
2054 buf += len;
2055 }
2056
2057 /* read the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002058 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002059 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002060 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
2061 return ret;
bellard83f64092006-08-01 16:21:11 +00002062 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002063 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002064 buf += len;
2065 count -= len;
2066 }
2067
2068 /* add data from the last sector */
2069 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002070 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2071 return ret;
bellard83f64092006-08-01 16:21:11 +00002072 memcpy(buf, tmp_buf, count);
2073 }
2074 return count1;
2075}
2076
aliguorieda578e2009-03-12 19:57:16 +00002077int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2078 const void *buf, int count1)
bellard83f64092006-08-01 16:21:11 +00002079{
Jan Kiszka6ea44302009-11-30 18:21:19 +01002080 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
bellard83f64092006-08-01 16:21:11 +00002081 int len, nb_sectors, count;
2082 int64_t sector_num;
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002083 int ret;
bellard83f64092006-08-01 16:21:11 +00002084
2085 count = count1;
2086 /* first write to align to sector start */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002087 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
bellard83f64092006-08-01 16:21:11 +00002088 if (len > count)
2089 len = count;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002090 sector_num = offset >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002091 if (len > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002092 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2093 return ret;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002094 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002095 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2096 return ret;
bellard83f64092006-08-01 16:21:11 +00002097 count -= len;
2098 if (count == 0)
2099 return count1;
2100 sector_num++;
2101 buf += len;
2102 }
2103
2104 /* write the sectors "in place" */
Jan Kiszka6ea44302009-11-30 18:21:19 +01002105 nb_sectors = count >> BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002106 if (nb_sectors > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002107 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
2108 return ret;
bellard83f64092006-08-01 16:21:11 +00002109 sector_num += nb_sectors;
Jan Kiszka6ea44302009-11-30 18:21:19 +01002110 len = nb_sectors << BDRV_SECTOR_BITS;
bellard83f64092006-08-01 16:21:11 +00002111 buf += len;
2112 count -= len;
2113 }
2114
2115 /* add data from the last sector */
2116 if (count > 0) {
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002117 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
2118 return ret;
bellard83f64092006-08-01 16:21:11 +00002119 memcpy(tmp_buf, buf, count);
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002120 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
2121 return ret;
bellard83f64092006-08-01 16:21:11 +00002122 }
2123 return count1;
2124}
bellard83f64092006-08-01 16:21:11 +00002125
Kevin Wolff08145f2010-06-16 16:38:15 +02002126/*
2127 * Writes to the file and ensures that no writes are reordered across this
2128 * request (acts as a barrier)
2129 *
2130 * Returns 0 on success, -errno in error cases.
2131 */
2132int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2133 const void *buf, int count)
2134{
2135 int ret;
2136
2137 ret = bdrv_pwrite(bs, offset, buf, count);
2138 if (ret < 0) {
2139 return ret;
2140 }
2141
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002142 /* No flush needed for cache modes that already do it */
2143 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002144 bdrv_flush(bs);
2145 }
2146
2147 return 0;
2148}
2149
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002150static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002151 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2152{
2153 /* Perform I/O through a temporary buffer so that users who scribble over
2154 * their read buffer while the operation is in progress do not end up
2155 * modifying the image file. This is critical for zero-copy guest I/O
2156 * where anything might happen inside guest memory.
2157 */
2158 void *bounce_buffer;
2159
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002160 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002161 struct iovec iov;
2162 QEMUIOVector bounce_qiov;
2163 int64_t cluster_sector_num;
2164 int cluster_nb_sectors;
2165 size_t skip_bytes;
2166 int ret;
2167
2168 /* Cover entire cluster so no additional backing file I/O is required when
2169 * allocating cluster in the image file.
2170 */
2171 round_to_clusters(bs, sector_num, nb_sectors,
2172 &cluster_sector_num, &cluster_nb_sectors);
2173
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002174 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2175 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002176
2177 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2178 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2179 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2180
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002181 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2182 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002183 if (ret < 0) {
2184 goto err;
2185 }
2186
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002187 if (drv->bdrv_co_write_zeroes &&
2188 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002189 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
2190 cluster_nb_sectors);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002191 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002192 /* This does not change the data on the disk, it is not necessary
2193 * to flush even in cache=writethrough mode.
2194 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002195 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002196 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002197 }
2198
Stefan Hajnocziab185922011-11-17 13:40:31 +00002199 if (ret < 0) {
2200 /* It might be okay to ignore write errors for guest requests. If this
2201 * is a deliberate copy-on-read then we don't want to ignore the error.
2202 * Simply report it in all cases.
2203 */
2204 goto err;
2205 }
2206
2207 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002208 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2209 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002210
2211err:
2212 qemu_vfree(bounce_buffer);
2213 return ret;
2214}
2215
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002216/*
2217 * Handle a read request in coroutine context
2218 */
2219static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002220 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2221 BdrvRequestFlags flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002222{
2223 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002224 BdrvTrackedRequest req;
2225 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002226
Kevin Wolfda1fa912011-07-14 17:27:13 +02002227 if (!drv) {
2228 return -ENOMEDIUM;
2229 }
2230 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2231 return -EIO;
2232 }
2233
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002234 /* throttling disk read I/O */
2235 if (bs->io_limits_enabled) {
2236 bdrv_io_limits_intercept(bs, false, nb_sectors);
2237 }
2238
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002239 if (bs->copy_on_read) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002240 flags |= BDRV_REQ_COPY_ON_READ;
2241 }
2242 if (flags & BDRV_REQ_COPY_ON_READ) {
2243 bs->copy_on_read_in_flight++;
2244 }
2245
2246 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002247 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2248 }
2249
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002250 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002251
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002252 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002253 int pnum;
2254
2255 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
2256 if (ret < 0) {
2257 goto out;
2258 }
2259
2260 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002261 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002262 goto out;
2263 }
2264 }
2265
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002266 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002267
2268out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002269 tracked_request_end(&req);
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002270
2271 if (flags & BDRV_REQ_COPY_ON_READ) {
2272 bs->copy_on_read_in_flight--;
2273 }
2274
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002275 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002276}
2277
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002278int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02002279 int nb_sectors, QEMUIOVector *qiov)
2280{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002281 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02002282
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002283 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
2284}
2285
2286int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
2287 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2288{
2289 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
2290
2291 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
2292 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002293}
2294
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002295static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
2296 int64_t sector_num, int nb_sectors)
2297{
2298 BlockDriver *drv = bs->drv;
2299 QEMUIOVector qiov;
2300 struct iovec iov;
2301 int ret;
2302
Kevin Wolf621f0582012-03-20 15:12:58 +01002303 /* TODO Emulate only part of misaligned requests instead of letting block
2304 * drivers return -ENOTSUP and emulate everything */
2305
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002306 /* First try the efficient write zeroes operation */
2307 if (drv->bdrv_co_write_zeroes) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002308 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2309 if (ret != -ENOTSUP) {
2310 return ret;
2311 }
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002312 }
2313
2314 /* Fall back to bounce buffer if write zeroes is unsupported */
2315 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2316 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
2317 memset(iov.iov_base, 0, iov.iov_len);
2318 qemu_iovec_init_external(&qiov, &iov, 1);
2319
2320 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
2321
2322 qemu_vfree(iov.iov_base);
2323 return ret;
2324}
2325
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002326/*
2327 * Handle a write request in coroutine context
2328 */
2329static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002330 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
2331 BdrvRequestFlags flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002332{
2333 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002334 BdrvTrackedRequest req;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002335 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002336
2337 if (!bs->drv) {
2338 return -ENOMEDIUM;
2339 }
2340 if (bs->read_only) {
2341 return -EACCES;
2342 }
2343 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2344 return -EIO;
2345 }
2346
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08002347 /* throttling disk write I/O */
2348 if (bs->io_limits_enabled) {
2349 bdrv_io_limits_intercept(bs, true, nb_sectors);
2350 }
2351
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002352 if (bs->copy_on_read_in_flight) {
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002353 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
2354 }
2355
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002356 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
2357
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002358 if (flags & BDRV_REQ_ZERO_WRITE) {
2359 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
2360 } else {
2361 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
2362 }
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002363
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002364 if (ret == 0 && !bs->enable_write_cache) {
2365 ret = bdrv_co_flush(bs);
2366 }
2367
Kevin Wolfda1fa912011-07-14 17:27:13 +02002368 if (bs->dirty_bitmap) {
2369 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2370 }
2371
2372 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2373 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2374 }
2375
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002376 tracked_request_end(&req);
2377
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01002378 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002379}
2380
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002381int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
2382 int nb_sectors, QEMUIOVector *qiov)
2383{
2384 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
2385
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00002386 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
2387}
2388
2389int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
2390 int64_t sector_num, int nb_sectors)
2391{
2392 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
2393
2394 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
2395 BDRV_REQ_ZERO_WRITE);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002396}
2397
bellard83f64092006-08-01 16:21:11 +00002398/**
bellard83f64092006-08-01 16:21:11 +00002399 * Truncate file to 'offset' bytes (needed only for file protocols)
2400 */
2401int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2402{
2403 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002404 int ret;
bellard83f64092006-08-01 16:21:11 +00002405 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002406 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00002407 if (!drv->bdrv_truncate)
2408 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02002409 if (bs->read_only)
2410 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02002411 if (bdrv_in_use(bs))
2412 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002413 ret = drv->bdrv_truncate(bs, offset);
2414 if (ret == 0) {
2415 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02002416 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002417 }
2418 return ret;
bellard83f64092006-08-01 16:21:11 +00002419}
2420
2421/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08002422 * Length of a allocated file in bytes. Sparse files are counted by actual
2423 * allocated space. Return < 0 if error or unknown.
2424 */
2425int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2426{
2427 BlockDriver *drv = bs->drv;
2428 if (!drv) {
2429 return -ENOMEDIUM;
2430 }
2431 if (drv->bdrv_get_allocated_file_size) {
2432 return drv->bdrv_get_allocated_file_size(bs);
2433 }
2434 if (bs->file) {
2435 return bdrv_get_allocated_file_size(bs->file);
2436 }
2437 return -ENOTSUP;
2438}
2439
2440/**
bellard83f64092006-08-01 16:21:11 +00002441 * Length of a file in bytes. Return < 0 if error or unknown.
2442 */
2443int64_t bdrv_getlength(BlockDriverState *bs)
2444{
2445 BlockDriver *drv = bs->drv;
2446 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002447 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01002448
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002449 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002450 if (drv->bdrv_getlength) {
2451 return drv->bdrv_getlength(bs);
2452 }
bellard83f64092006-08-01 16:21:11 +00002453 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01002454 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00002455}
2456
bellard19cb3732006-08-19 11:45:59 +00002457/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00002458void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00002459{
bellard19cb3732006-08-19 11:45:59 +00002460 int64_t length;
2461 length = bdrv_getlength(bs);
2462 if (length < 0)
2463 length = 0;
2464 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01002465 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00002466 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00002467}
bellardcf989512004-02-16 21:56:36 +00002468
Zhi Yong Wu0563e192011-11-03 16:57:25 +08002469/* throttling disk io limits */
2470void bdrv_set_io_limits(BlockDriverState *bs,
2471 BlockIOLimit *io_limits)
2472{
2473 bs->io_limits = *io_limits;
2474 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2475}
2476
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02002477void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2478 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002479{
2480 bs->on_read_error = on_read_error;
2481 bs->on_write_error = on_write_error;
2482}
2483
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02002484BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02002485{
2486 return is_read ? bs->on_read_error : bs->on_write_error;
2487}
2488
bellardb3380822004-03-14 21:38:54 +00002489int bdrv_is_read_only(BlockDriverState *bs)
2490{
2491 return bs->read_only;
2492}
2493
ths985a03b2007-12-24 16:10:43 +00002494int bdrv_is_sg(BlockDriverState *bs)
2495{
2496 return bs->sg;
2497}
2498
Christoph Hellwige900a7b2009-09-04 19:01:15 +02002499int bdrv_enable_write_cache(BlockDriverState *bs)
2500{
2501 return bs->enable_write_cache;
2502}
2503
Paolo Bonzini425b0142012-06-06 00:04:52 +02002504void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2505{
2506 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04002507
2508 /* so a reopen() will preserve wce */
2509 if (wce) {
2510 bs->open_flags |= BDRV_O_CACHE_WB;
2511 } else {
2512 bs->open_flags &= ~BDRV_O_CACHE_WB;
2513 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02002514}
2515
bellardea2384d2004-08-01 21:59:26 +00002516int bdrv_is_encrypted(BlockDriverState *bs)
2517{
2518 if (bs->backing_hd && bs->backing_hd->encrypted)
2519 return 1;
2520 return bs->encrypted;
2521}
2522
aliguoric0f4ce72009-03-05 23:01:01 +00002523int bdrv_key_required(BlockDriverState *bs)
2524{
2525 BlockDriverState *backing_hd = bs->backing_hd;
2526
2527 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2528 return 1;
2529 return (bs->encrypted && !bs->valid_key);
2530}
2531
bellardea2384d2004-08-01 21:59:26 +00002532int bdrv_set_key(BlockDriverState *bs, const char *key)
2533{
2534 int ret;
2535 if (bs->backing_hd && bs->backing_hd->encrypted) {
2536 ret = bdrv_set_key(bs->backing_hd, key);
2537 if (ret < 0)
2538 return ret;
2539 if (!bs->encrypted)
2540 return 0;
2541 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02002542 if (!bs->encrypted) {
2543 return -EINVAL;
2544 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2545 return -ENOMEDIUM;
2546 }
aliguoric0f4ce72009-03-05 23:01:01 +00002547 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00002548 if (ret < 0) {
2549 bs->valid_key = 0;
2550 } else if (!bs->valid_key) {
2551 bs->valid_key = 1;
2552 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002553 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00002554 }
aliguoric0f4ce72009-03-05 23:01:01 +00002555 return ret;
bellardea2384d2004-08-01 21:59:26 +00002556}
2557
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02002558const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00002559{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02002560 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00002561}
2562
ths5fafdf22007-09-16 21:08:06 +00002563void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00002564 void *opaque)
2565{
2566 BlockDriver *drv;
2567
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01002568 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00002569 it(opaque, drv->format_name);
2570 }
2571}
2572
bellardb3380822004-03-14 21:38:54 +00002573BlockDriverState *bdrv_find(const char *name)
2574{
2575 BlockDriverState *bs;
2576
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002577 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2578 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00002579 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002580 }
bellardb3380822004-03-14 21:38:54 +00002581 }
2582 return NULL;
2583}
2584
Markus Armbruster2f399b02010-06-02 18:55:20 +02002585BlockDriverState *bdrv_next(BlockDriverState *bs)
2586{
2587 if (!bs) {
2588 return QTAILQ_FIRST(&bdrv_states);
2589 }
2590 return QTAILQ_NEXT(bs, list);
2591}
2592
aliguori51de9762009-03-05 23:00:43 +00002593void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00002594{
2595 BlockDriverState *bs;
2596
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002597 QTAILQ_FOREACH(bs, &bdrv_states, list) {
aliguori51de9762009-03-05 23:00:43 +00002598 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00002599 }
2600}
2601
bellardea2384d2004-08-01 21:59:26 +00002602const char *bdrv_get_device_name(BlockDriverState *bs)
2603{
2604 return bs->device_name;
2605}
2606
Markus Armbrusterc8433282012-06-05 16:49:24 +02002607int bdrv_get_flags(BlockDriverState *bs)
2608{
2609 return bs->open_flags;
2610}
2611
aliguoric6ca28d2008-10-06 13:55:43 +00002612void bdrv_flush_all(void)
2613{
2614 BlockDriverState *bs;
2615
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002616 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01002617 bdrv_flush(bs);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002618 }
aliguoric6ca28d2008-10-06 13:55:43 +00002619}
2620
Kevin Wolff2feebb2010-04-14 17:30:35 +02002621int bdrv_has_zero_init(BlockDriverState *bs)
2622{
2623 assert(bs->drv);
2624
Kevin Wolf336c1c12010-07-28 11:26:29 +02002625 if (bs->drv->bdrv_has_zero_init) {
2626 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02002627 }
2628
2629 return 1;
2630}
2631
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00002632typedef struct BdrvCoIsAllocatedData {
2633 BlockDriverState *bs;
2634 int64_t sector_num;
2635 int nb_sectors;
2636 int *pnum;
2637 int ret;
2638 bool done;
2639} BdrvCoIsAllocatedData;
2640
thsf58c7b32008-06-05 21:53:49 +00002641/*
2642 * Returns true iff the specified sector is present in the disk image. Drivers
2643 * not implementing the functionality are assumed to not support backing files,
2644 * hence all their sectors are reported as allocated.
2645 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002646 * If 'sector_num' is beyond the end of the disk image the return value is 0
2647 * and 'pnum' is set to 0.
2648 *
thsf58c7b32008-06-05 21:53:49 +00002649 * 'pnum' is set to the number of sectors (including and immediately following
2650 * the specified sector) that are known to be in the same
2651 * allocated/unallocated state.
2652 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002653 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2654 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00002655 */
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002656int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2657 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00002658{
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002659 int64_t n;
2660
2661 if (sector_num >= bs->total_sectors) {
2662 *pnum = 0;
2663 return 0;
2664 }
2665
2666 n = bs->total_sectors - sector_num;
2667 if (n < nb_sectors) {
2668 nb_sectors = n;
2669 }
2670
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002671 if (!bs->drv->bdrv_co_is_allocated) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00002672 *pnum = nb_sectors;
thsf58c7b32008-06-05 21:53:49 +00002673 return 1;
2674 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002675
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00002676 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2677}
2678
2679/* Coroutine wrapper for bdrv_is_allocated() */
2680static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2681{
2682 BdrvCoIsAllocatedData *data = opaque;
2683 BlockDriverState *bs = data->bs;
2684
2685 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2686 data->pnum);
2687 data->done = true;
2688}
2689
2690/*
2691 * Synchronous wrapper around bdrv_co_is_allocated().
2692 *
2693 * See bdrv_co_is_allocated() for details.
2694 */
2695int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2696 int *pnum)
2697{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00002698 Coroutine *co;
2699 BdrvCoIsAllocatedData data = {
2700 .bs = bs,
2701 .sector_num = sector_num,
2702 .nb_sectors = nb_sectors,
2703 .pnum = pnum,
2704 .done = false,
2705 };
2706
2707 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2708 qemu_coroutine_enter(co, &data);
2709 while (!data.done) {
2710 qemu_aio_wait();
2711 }
2712 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00002713}
2714
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02002715/*
2716 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2717 *
2718 * Return true if the given sector is allocated in any image between
2719 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2720 * sector is allocated in any image of the chain. Return false otherwise.
2721 *
2722 * 'pnum' is set to the number of sectors (including and immediately following
2723 * the specified sector) that are known to be in the same
2724 * allocated/unallocated state.
2725 *
2726 */
2727int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
2728 BlockDriverState *base,
2729 int64_t sector_num,
2730 int nb_sectors, int *pnum)
2731{
2732 BlockDriverState *intermediate;
2733 int ret, n = nb_sectors;
2734
2735 intermediate = top;
2736 while (intermediate && intermediate != base) {
2737 int pnum_inter;
2738 ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
2739 &pnum_inter);
2740 if (ret < 0) {
2741 return ret;
2742 } else if (ret) {
2743 *pnum = pnum_inter;
2744 return 1;
2745 }
2746
2747 /*
2748 * [sector_num, nb_sectors] is unallocated on top but intermediate
2749 * might have
2750 *
2751 * [sector_num+x, nr_sectors] allocated.
2752 */
2753 if (n > pnum_inter) {
2754 n = pnum_inter;
2755 }
2756
2757 intermediate = intermediate->backing_hd;
2758 }
2759
2760 *pnum = n;
2761 return 0;
2762}
2763
Luiz Capitulinob2023812011-09-21 17:16:47 -03002764BlockInfoList *qmp_query_block(Error **errp)
bellardb3380822004-03-14 21:38:54 +00002765{
Luiz Capitulinob2023812011-09-21 17:16:47 -03002766 BlockInfoList *head = NULL, *cur_item = NULL;
bellardb3380822004-03-14 21:38:54 +00002767 BlockDriverState *bs;
2768
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002769 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002770 BlockInfoList *info = g_malloc0(sizeof(*info));
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002771
Luiz Capitulinob2023812011-09-21 17:16:47 -03002772 info->value = g_malloc0(sizeof(*info->value));
2773 info->value->device = g_strdup(bs->device_name);
2774 info->value->type = g_strdup("unknown");
2775 info->value->locked = bdrv_dev_is_medium_locked(bs);
2776 info->value->removable = bdrv_dev_has_removable_media(bs);
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002777
Markus Armbrustere4def802011-09-06 18:58:53 +02002778 if (bdrv_dev_has_removable_media(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002779 info->value->has_tray_open = true;
2780 info->value->tray_open = bdrv_dev_is_tray_open(bs);
Markus Armbrustere4def802011-09-06 18:58:53 +02002781 }
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002782
2783 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002784 info->value->has_io_status = true;
2785 info->value->io_status = bs->iostatus;
Luiz Capitulinof04ef602011-09-26 17:43:54 -03002786 }
2787
bellard19cb3732006-08-19 11:45:59 +00002788 if (bs->drv) {
Luiz Capitulinob2023812011-09-21 17:16:47 -03002789 info->value->has_inserted = true;
2790 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2791 info->value->inserted->file = g_strdup(bs->filename);
2792 info->value->inserted->ro = bs->read_only;
2793 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2794 info->value->inserted->encrypted = bs->encrypted;
Luiz Capitulinoc75a1a82012-07-26 20:28:44 -03002795 info->value->inserted->encryption_key_missing = bdrv_key_required(bs);
Luiz Capitulinob2023812011-09-21 17:16:47 -03002796 if (bs->backing_file[0]) {
2797 info->value->inserted->has_backing_file = true;
2798 info->value->inserted->backing_file = g_strdup(bs->backing_file);
aliguori376253e2009-03-05 23:01:23 +00002799 }
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002800
Benoît Canet2e3e3312012-08-02 10:22:48 +02002801 info->value->inserted->backing_file_depth =
2802 bdrv_get_backing_file_depth(bs);
2803
Zhi Yong Wu727f0052011-11-08 13:00:31 +08002804 if (bs->io_limits_enabled) {
2805 info->value->inserted->bps =
2806 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2807 info->value->inserted->bps_rd =
2808 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2809 info->value->inserted->bps_wr =
2810 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2811 info->value->inserted->iops =
2812 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2813 info->value->inserted->iops_rd =
2814 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2815 info->value->inserted->iops_wr =
2816 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2817 }
bellardb3380822004-03-14 21:38:54 +00002818 }
Luiz Capitulinob2023812011-09-21 17:16:47 -03002819
2820 /* XXX: waiting for the qapi to support GSList */
2821 if (!cur_item) {
2822 head = cur_item = info;
2823 } else {
2824 cur_item->next = info;
2825 cur_item = info;
2826 }
bellardb3380822004-03-14 21:38:54 +00002827 }
Luiz Capitulinod15e5462009-12-10 17:16:06 -02002828
Luiz Capitulinob2023812011-09-21 17:16:47 -03002829 return head;
bellardb3380822004-03-14 21:38:54 +00002830}
thsa36e69d2007-12-02 05:18:19 +00002831
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002832/* Consider exposing this as a full fledged QMP command */
2833static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
thsa36e69d2007-12-02 05:18:19 +00002834{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002835 BlockStats *s;
Luiz Capitulino218a5362009-12-10 17:16:07 -02002836
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002837 s = g_malloc0(sizeof(*s));
Luiz Capitulino218a5362009-12-10 17:16:07 -02002838
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002839 if (bs->device_name[0]) {
2840 s->has_device = true;
2841 s->device = g_strdup(bs->device_name);
Kevin Wolf294cc352010-04-28 14:34:01 +02002842 }
2843
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002844 s->stats = g_malloc0(sizeof(*s->stats));
2845 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2846 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2847 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2848 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2849 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2850 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2851 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2852 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2853 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2854
Kevin Wolf294cc352010-04-28 14:34:01 +02002855 if (bs->file) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002856 s->has_parent = true;
2857 s->parent = qmp_query_blockstat(bs->file, NULL);
Kevin Wolf294cc352010-04-28 14:34:01 +02002858 }
2859
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002860 return s;
Kevin Wolf294cc352010-04-28 14:34:01 +02002861}
2862
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002863BlockStatsList *qmp_query_blockstats(Error **errp)
Luiz Capitulino218a5362009-12-10 17:16:07 -02002864{
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002865 BlockStatsList *head = NULL, *cur_item = NULL;
thsa36e69d2007-12-02 05:18:19 +00002866 BlockDriverState *bs;
2867
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002868 QTAILQ_FOREACH(bs, &bdrv_states, list) {
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002869 BlockStatsList *info = g_malloc0(sizeof(*info));
2870 info->value = qmp_query_blockstat(bs, NULL);
2871
2872 /* XXX: waiting for the qapi to support GSList */
2873 if (!cur_item) {
2874 head = cur_item = info;
2875 } else {
2876 cur_item->next = info;
2877 cur_item = info;
2878 }
thsa36e69d2007-12-02 05:18:19 +00002879 }
Luiz Capitulino218a5362009-12-10 17:16:07 -02002880
Luiz Capitulinof11f57e2011-09-22 15:56:36 -03002881 return head;
thsa36e69d2007-12-02 05:18:19 +00002882}
bellardea2384d2004-08-01 21:59:26 +00002883
aliguori045df332009-03-05 23:00:48 +00002884const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2885{
2886 if (bs->backing_hd && bs->backing_hd->encrypted)
2887 return bs->backing_file;
2888 else if (bs->encrypted)
2889 return bs->filename;
2890 else
2891 return NULL;
2892}
2893
ths5fafdf22007-09-16 21:08:06 +00002894void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00002895 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00002896{
Kevin Wolf3574c602011-10-26 11:02:11 +02002897 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00002898}
2899
ths5fafdf22007-09-16 21:08:06 +00002900int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00002901 const uint8_t *buf, int nb_sectors)
2902{
2903 BlockDriver *drv = bs->drv;
2904 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002905 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002906 if (!drv->bdrv_write_compressed)
2907 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002908 if (bdrv_check_request(bs, sector_num, nb_sectors))
2909 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002910
Jan Kiszkac6d22832009-11-30 18:21:20 +01002911 if (bs->dirty_bitmap) {
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02002912 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2913 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01002914
bellardfaea38e2006-08-05 21:31:00 +00002915 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2916}
ths3b46e622007-09-17 08:09:54 +00002917
bellardfaea38e2006-08-05 21:31:00 +00002918int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2919{
2920 BlockDriver *drv = bs->drv;
2921 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00002922 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00002923 if (!drv->bdrv_get_info)
2924 return -ENOTSUP;
2925 memset(bdi, 0, sizeof(*bdi));
2926 return drv->bdrv_get_info(bs, bdi);
2927}
2928
Christoph Hellwig45566e92009-07-10 23:11:57 +02002929int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2930 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002931{
2932 BlockDriver *drv = bs->drv;
2933 if (!drv)
2934 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002935 if (drv->bdrv_save_vmstate)
2936 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2937 if (bs->file)
2938 return bdrv_save_vmstate(bs->file, buf, pos, size);
2939 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002940}
2941
Christoph Hellwig45566e92009-07-10 23:11:57 +02002942int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2943 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00002944{
2945 BlockDriver *drv = bs->drv;
2946 if (!drv)
2947 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09002948 if (drv->bdrv_load_vmstate)
2949 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2950 if (bs->file)
2951 return bdrv_load_vmstate(bs->file, buf, pos, size);
2952 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00002953}
2954
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002955void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2956{
2957 BlockDriver *drv = bs->drv;
2958
2959 if (!drv || !drv->bdrv_debug_event) {
2960 return;
2961 }
2962
Blue Swirl0ed8b6f2012-07-08 06:56:53 +00002963 drv->bdrv_debug_event(bs, event);
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01002964
2965}
2966
bellardfaea38e2006-08-05 21:31:00 +00002967/**************************************************************/
2968/* handling of snapshots */
2969
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002970int bdrv_can_snapshot(BlockDriverState *bs)
2971{
2972 BlockDriver *drv = bs->drv;
Markus Armbruster07b70bf2011-08-03 15:08:11 +02002973 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Miguel Di Ciurcio Filhofeeee5a2010-06-08 10:40:55 -03002974 return 0;
2975 }
2976
2977 if (!drv->bdrv_snapshot_create) {
2978 if (bs->file != NULL) {
2979 return bdrv_can_snapshot(bs->file);
2980 }
2981 return 0;
2982 }
2983
2984 return 1;
2985}
2986
Blue Swirl199630b2010-07-25 20:49:34 +00002987int bdrv_is_snapshot(BlockDriverState *bs)
2988{
2989 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2990}
2991
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002992BlockDriverState *bdrv_snapshots(void)
2993{
2994 BlockDriverState *bs;
2995
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002996 if (bs_snapshots) {
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002997 return bs_snapshots;
Markus Armbruster3ac906f2010-07-01 09:30:38 +02002998 }
Markus Armbrusterf9092b12010-06-25 10:33:39 +02002999
3000 bs = NULL;
3001 while ((bs = bdrv_next(bs))) {
3002 if (bdrv_can_snapshot(bs)) {
Markus Armbruster3ac906f2010-07-01 09:30:38 +02003003 bs_snapshots = bs;
3004 return bs;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003005 }
3006 }
3007 return NULL;
Markus Armbrusterf9092b12010-06-25 10:33:39 +02003008}
3009
ths5fafdf22007-09-16 21:08:06 +00003010int bdrv_snapshot_create(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003011 QEMUSnapshotInfo *sn_info)
3012{
3013 BlockDriver *drv = bs->drv;
3014 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003015 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003016 if (drv->bdrv_snapshot_create)
3017 return drv->bdrv_snapshot_create(bs, sn_info);
3018 if (bs->file)
3019 return bdrv_snapshot_create(bs->file, sn_info);
3020 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003021}
3022
ths5fafdf22007-09-16 21:08:06 +00003023int bdrv_snapshot_goto(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003024 const char *snapshot_id)
3025{
3026 BlockDriver *drv = bs->drv;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003027 int ret, open_ret;
3028
bellardfaea38e2006-08-05 21:31:00 +00003029 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003030 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003031 if (drv->bdrv_snapshot_goto)
3032 return drv->bdrv_snapshot_goto(bs, snapshot_id);
3033
3034 if (bs->file) {
3035 drv->bdrv_close(bs);
3036 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
3037 open_ret = drv->bdrv_open(bs, bs->open_flags);
3038 if (open_ret < 0) {
3039 bdrv_delete(bs->file);
3040 bs->drv = NULL;
3041 return open_ret;
3042 }
3043 return ret;
3044 }
3045
3046 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003047}
3048
3049int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
3050{
3051 BlockDriver *drv = bs->drv;
3052 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003053 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003054 if (drv->bdrv_snapshot_delete)
3055 return drv->bdrv_snapshot_delete(bs, snapshot_id);
3056 if (bs->file)
3057 return bdrv_snapshot_delete(bs->file, snapshot_id);
3058 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003059}
3060
ths5fafdf22007-09-16 21:08:06 +00003061int bdrv_snapshot_list(BlockDriverState *bs,
bellardfaea38e2006-08-05 21:31:00 +00003062 QEMUSnapshotInfo **psn_info)
3063{
3064 BlockDriver *drv = bs->drv;
3065 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003066 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09003067 if (drv->bdrv_snapshot_list)
3068 return drv->bdrv_snapshot_list(bs, psn_info);
3069 if (bs->file)
3070 return bdrv_snapshot_list(bs->file, psn_info);
3071 return -ENOTSUP;
bellardfaea38e2006-08-05 21:31:00 +00003072}
3073
edison51ef6722010-09-21 19:58:41 -07003074int bdrv_snapshot_load_tmp(BlockDriverState *bs,
3075 const char *snapshot_name)
3076{
3077 BlockDriver *drv = bs->drv;
3078 if (!drv) {
3079 return -ENOMEDIUM;
3080 }
3081 if (!bs->read_only) {
3082 return -EINVAL;
3083 }
3084 if (drv->bdrv_snapshot_load_tmp) {
3085 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
3086 }
3087 return -ENOTSUP;
3088}
3089
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00003090BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3091 const char *backing_file)
3092{
3093 if (!bs->drv) {
3094 return NULL;
3095 }
3096
3097 if (bs->backing_hd) {
3098 if (strcmp(bs->backing_file, backing_file) == 0) {
3099 return bs->backing_hd;
3100 } else {
3101 return bdrv_find_backing_image(bs->backing_hd, backing_file);
3102 }
3103 }
3104
3105 return NULL;
3106}
3107
Benoît Canetf198fd12012-08-02 10:22:47 +02003108int bdrv_get_backing_file_depth(BlockDriverState *bs)
3109{
3110 if (!bs->drv) {
3111 return 0;
3112 }
3113
3114 if (!bs->backing_hd) {
3115 return 0;
3116 }
3117
3118 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3119}
3120
Jeff Cody79fac562012-09-27 13:29:15 -04003121BlockDriverState *bdrv_find_base(BlockDriverState *bs)
3122{
3123 BlockDriverState *curr_bs = NULL;
3124
3125 if (!bs) {
3126 return NULL;
3127 }
3128
3129 curr_bs = bs;
3130
3131 while (curr_bs->backing_hd) {
3132 curr_bs = curr_bs->backing_hd;
3133 }
3134 return curr_bs;
3135}
3136
bellardfaea38e2006-08-05 21:31:00 +00003137#define NB_SUFFIXES 4
3138
3139char *get_human_readable_size(char *buf, int buf_size, int64_t size)
3140{
3141 static const char suffixes[NB_SUFFIXES] = "KMGT";
3142 int64_t base;
3143 int i;
3144
3145 if (size <= 999) {
3146 snprintf(buf, buf_size, "%" PRId64, size);
3147 } else {
3148 base = 1024;
3149 for(i = 0; i < NB_SUFFIXES; i++) {
3150 if (size < (10 * base)) {
ths5fafdf22007-09-16 21:08:06 +00003151 snprintf(buf, buf_size, "%0.1f%c",
bellardfaea38e2006-08-05 21:31:00 +00003152 (double)size / base,
3153 suffixes[i]);
3154 break;
3155 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
ths5fafdf22007-09-16 21:08:06 +00003156 snprintf(buf, buf_size, "%" PRId64 "%c",
bellardfaea38e2006-08-05 21:31:00 +00003157 ((size + (base >> 1)) / base),
3158 suffixes[i]);
3159 break;
3160 }
3161 base = base * 1024;
3162 }
3163 }
3164 return buf;
3165}
3166
3167char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
3168{
3169 char buf1[128], date_buf[128], clock_buf[128];
bellard3b9f94e2007-01-07 17:27:07 +00003170#ifdef _WIN32
3171 struct tm *ptm;
3172#else
bellardfaea38e2006-08-05 21:31:00 +00003173 struct tm tm;
bellard3b9f94e2007-01-07 17:27:07 +00003174#endif
bellardfaea38e2006-08-05 21:31:00 +00003175 time_t ti;
3176 int64_t secs;
3177
3178 if (!sn) {
ths5fafdf22007-09-16 21:08:06 +00003179 snprintf(buf, buf_size,
3180 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00003181 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3182 } else {
3183 ti = sn->date_sec;
bellard3b9f94e2007-01-07 17:27:07 +00003184#ifdef _WIN32
3185 ptm = localtime(&ti);
3186 strftime(date_buf, sizeof(date_buf),
3187 "%Y-%m-%d %H:%M:%S", ptm);
3188#else
bellardfaea38e2006-08-05 21:31:00 +00003189 localtime_r(&ti, &tm);
3190 strftime(date_buf, sizeof(date_buf),
3191 "%Y-%m-%d %H:%M:%S", &tm);
bellard3b9f94e2007-01-07 17:27:07 +00003192#endif
bellardfaea38e2006-08-05 21:31:00 +00003193 secs = sn->vm_clock_nsec / 1000000000;
3194 snprintf(clock_buf, sizeof(clock_buf),
3195 "%02d:%02d:%02d.%03d",
3196 (int)(secs / 3600),
3197 (int)((secs / 60) % 60),
ths5fafdf22007-09-16 21:08:06 +00003198 (int)(secs % 60),
bellardfaea38e2006-08-05 21:31:00 +00003199 (int)((sn->vm_clock_nsec / 1000000) % 1000));
3200 snprintf(buf, buf_size,
ths5fafdf22007-09-16 21:08:06 +00003201 "%-10s%-20s%7s%20s%15s",
bellardfaea38e2006-08-05 21:31:00 +00003202 sn->id_str, sn->name,
3203 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
3204 date_buf,
3205 clock_buf);
3206 }
3207 return buf;
3208}
3209
bellard83f64092006-08-01 16:21:11 +00003210/**************************************************************/
3211/* async I/Os */
3212
aliguori3b69e4b2009-01-22 16:59:24 +00003213BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00003214 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00003215 BlockDriverCompletionFunc *cb, void *opaque)
3216{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01003217 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
3218
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003219 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003220 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00003221}
3222
aliguorif141eaf2009-04-07 18:43:24 +00003223BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
3224 QEMUIOVector *qiov, int nb_sectors,
3225 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003226{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01003227 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
3228
Stefan Hajnoczi1a6e1152011-10-13 13:08:25 +01003229 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003230 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00003231}
3232
Kevin Wolf40b4f532009-09-09 17:53:37 +02003233
3234typedef struct MultiwriteCB {
3235 int error;
3236 int num_requests;
3237 int num_callbacks;
3238 struct {
3239 BlockDriverCompletionFunc *cb;
3240 void *opaque;
3241 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003242 } callbacks[];
3243} MultiwriteCB;
3244
3245static void multiwrite_user_cb(MultiwriteCB *mcb)
3246{
3247 int i;
3248
3249 for (i = 0; i < mcb->num_callbacks; i++) {
3250 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01003251 if (mcb->callbacks[i].free_qiov) {
3252 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3253 }
Anthony Liguori7267c092011-08-20 22:09:37 -05003254 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003255 }
3256}
3257
3258static void multiwrite_cb(void *opaque, int ret)
3259{
3260 MultiwriteCB *mcb = opaque;
3261
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003262 trace_multiwrite_cb(mcb, ret);
3263
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02003264 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02003265 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003266 }
3267
3268 mcb->num_requests--;
3269 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02003270 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05003271 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003272 }
3273}
3274
3275static int multiwrite_req_compare(const void *a, const void *b)
3276{
Christoph Hellwig77be4362010-05-19 20:53:10 +02003277 const BlockRequest *req1 = a, *req2 = b;
3278
3279 /*
3280 * Note that we can't simply subtract req2->sector from req1->sector
3281 * here as that could overflow the return value.
3282 */
3283 if (req1->sector > req2->sector) {
3284 return 1;
3285 } else if (req1->sector < req2->sector) {
3286 return -1;
3287 } else {
3288 return 0;
3289 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02003290}
3291
3292/*
3293 * Takes a bunch of requests and tries to merge them. Returns the number of
3294 * requests that remain after merging.
3295 */
3296static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3297 int num_reqs, MultiwriteCB *mcb)
3298{
3299 int i, outidx;
3300
3301 // Sort requests by start sector
3302 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3303
3304 // Check if adjacent requests touch the same clusters. If so, combine them,
3305 // filling up gaps with zero sectors.
3306 outidx = 0;
3307 for (i = 1; i < num_reqs; i++) {
3308 int merge = 0;
3309 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3310
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003311 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02003312 if (reqs[i].sector <= oldreq_last) {
3313 merge = 1;
3314 }
3315
Christoph Hellwige2a305f2010-01-26 14:49:08 +01003316 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3317 merge = 0;
3318 }
3319
Kevin Wolf40b4f532009-09-09 17:53:37 +02003320 if (merge) {
3321 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05003322 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003323 qemu_iovec_init(qiov,
3324 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3325
3326 // Add the first request to the merged one. If the requests are
3327 // overlapping, drop the last sectors of the first request.
3328 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04003329 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003330
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01003331 // We should need to add any zeros between the two requests
3332 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003333
3334 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04003335 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003336
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02003337 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003338 reqs[outidx].qiov = qiov;
3339
3340 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3341 } else {
3342 outidx++;
3343 reqs[outidx].sector = reqs[i].sector;
3344 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3345 reqs[outidx].qiov = reqs[i].qiov;
3346 }
3347 }
3348
3349 return outidx + 1;
3350}
3351
3352/*
3353 * Submit multiple AIO write requests at once.
3354 *
3355 * On success, the function returns 0 and all requests in the reqs array have
3356 * been submitted. In error case this function returns -1, and any of the
3357 * requests may or may not be submitted yet. In particular, this means that the
3358 * callback will be called for some of the requests, for others it won't. The
3359 * caller must check the error field of the BlockRequest to wait for the right
3360 * callbacks (if error != 0, no callback will be called).
3361 *
3362 * The implementation may modify the contents of the reqs array, e.g. to merge
3363 * requests. However, the fields opaque and error are left unmodified as they
3364 * are used to signal failure for a single request to the caller.
3365 */
3366int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3367{
Kevin Wolf40b4f532009-09-09 17:53:37 +02003368 MultiwriteCB *mcb;
3369 int i;
3370
Ryan Harper301db7c2011-03-07 10:01:04 -06003371 /* don't submit writes if we don't have a medium */
3372 if (bs->drv == NULL) {
3373 for (i = 0; i < num_reqs; i++) {
3374 reqs[i].error = -ENOMEDIUM;
3375 }
3376 return -1;
3377 }
3378
Kevin Wolf40b4f532009-09-09 17:53:37 +02003379 if (num_reqs == 0) {
3380 return 0;
3381 }
3382
3383 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05003384 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02003385 mcb->num_requests = 0;
3386 mcb->num_callbacks = num_reqs;
3387
3388 for (i = 0; i < num_reqs; i++) {
3389 mcb->callbacks[i].cb = reqs[i].cb;
3390 mcb->callbacks[i].opaque = reqs[i].opaque;
3391 }
3392
3393 // Check for mergable requests
3394 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3395
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01003396 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3397
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01003398 /* Run the aio requests. */
3399 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003400 for (i = 0; i < num_reqs; i++) {
Paolo Bonziniad54ae82011-11-30 09:12:30 +01003401 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
Kevin Wolf40b4f532009-09-09 17:53:37 +02003402 reqs[i].nb_sectors, multiwrite_cb, mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02003403 }
3404
3405 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02003406}
3407
bellard83f64092006-08-01 16:21:11 +00003408void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00003409{
aliguori6bbff9a2009-03-20 18:25:59 +00003410 acb->pool->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00003411}
3412
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08003413/* block I/O throttling */
3414static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3415 bool is_write, double elapsed_time, uint64_t *wait)
3416{
3417 uint64_t bps_limit = 0;
3418 double bytes_limit, bytes_base, bytes_res;
3419 double slice_time, wait_time;
3420
3421 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3422 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3423 } else if (bs->io_limits.bps[is_write]) {
3424 bps_limit = bs->io_limits.bps[is_write];
3425 } else {
3426 if (wait) {
3427 *wait = 0;
3428 }
3429
3430 return false;
3431 }
3432
3433 slice_time = bs->slice_end - bs->slice_start;
3434 slice_time /= (NANOSECONDS_PER_SECOND);
3435 bytes_limit = bps_limit * slice_time;
3436 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3437 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3438 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3439 }
3440
3441 /* bytes_base: the bytes of data which have been read/written; and
3442 * it is obtained from the history statistic info.
3443 * bytes_res: the remaining bytes of data which need to be read/written.
3444 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3445 * the total time for completing reading/writting all data.
3446 */
3447 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3448
3449 if (bytes_base + bytes_res <= bytes_limit) {
3450 if (wait) {
3451 *wait = 0;
3452 }
3453
3454 return false;
3455 }
3456
3457 /* Calc approx time to dispatch */
3458 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3459
3460 /* When the I/O rate at runtime exceeds the limits,
3461 * bs->slice_end need to be extended in order that the current statistic
3462 * info can be kept until the timer fire, so it is increased and tuned
3463 * based on the result of experiment.
3464 */
3465 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3466 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3467 if (wait) {
3468 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3469 }
3470
3471 return true;
3472}
3473
3474static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3475 double elapsed_time, uint64_t *wait)
3476{
3477 uint64_t iops_limit = 0;
3478 double ios_limit, ios_base;
3479 double slice_time, wait_time;
3480
3481 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3482 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3483 } else if (bs->io_limits.iops[is_write]) {
3484 iops_limit = bs->io_limits.iops[is_write];
3485 } else {
3486 if (wait) {
3487 *wait = 0;
3488 }
3489
3490 return false;
3491 }
3492
3493 slice_time = bs->slice_end - bs->slice_start;
3494 slice_time /= (NANOSECONDS_PER_SECOND);
3495 ios_limit = iops_limit * slice_time;
3496 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3497 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3498 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3499 }
3500
3501 if (ios_base + 1 <= ios_limit) {
3502 if (wait) {
3503 *wait = 0;
3504 }
3505
3506 return false;
3507 }
3508
3509 /* Calc approx time to dispatch */
3510 wait_time = (ios_base + 1) / iops_limit;
3511 if (wait_time > elapsed_time) {
3512 wait_time = wait_time - elapsed_time;
3513 } else {
3514 wait_time = 0;
3515 }
3516
3517 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3518 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3519 if (wait) {
3520 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3521 }
3522
3523 return true;
3524}
3525
3526static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3527 bool is_write, int64_t *wait)
3528{
3529 int64_t now, max_wait;
3530 uint64_t bps_wait = 0, iops_wait = 0;
3531 double elapsed_time;
3532 int bps_ret, iops_ret;
3533
3534 now = qemu_get_clock_ns(vm_clock);
3535 if ((bs->slice_start < now)
3536 && (bs->slice_end > now)) {
3537 bs->slice_end = now + bs->slice_time;
3538 } else {
3539 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3540 bs->slice_start = now;
3541 bs->slice_end = now + bs->slice_time;
3542
3543 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3544 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3545
3546 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3547 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3548 }
3549
3550 elapsed_time = now - bs->slice_start;
3551 elapsed_time /= (NANOSECONDS_PER_SECOND);
3552
3553 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3554 is_write, elapsed_time, &bps_wait);
3555 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3556 elapsed_time, &iops_wait);
3557 if (bps_ret || iops_ret) {
3558 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3559 if (wait) {
3560 *wait = max_wait;
3561 }
3562
3563 now = qemu_get_clock_ns(vm_clock);
3564 if (bs->slice_end < now + max_wait) {
3565 bs->slice_end = now + max_wait;
3566 }
3567
3568 return true;
3569 }
3570
3571 if (wait) {
3572 *wait = 0;
3573 }
3574
3575 return false;
3576}
pbrookce1a14d2006-08-07 02:38:06 +00003577
bellard83f64092006-08-01 16:21:11 +00003578/**************************************************************/
3579/* async block device emulation */
3580
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003581typedef struct BlockDriverAIOCBSync {
3582 BlockDriverAIOCB common;
3583 QEMUBH *bh;
3584 int ret;
3585 /* vector translation state */
3586 QEMUIOVector *qiov;
3587 uint8_t *bounce;
3588 int is_write;
3589} BlockDriverAIOCBSync;
3590
3591static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3592{
Kevin Wolfb666d232010-05-05 11:44:39 +02003593 BlockDriverAIOCBSync *acb =
3594 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03003595 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003596 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003597 qemu_aio_release(acb);
3598}
3599
3600static AIOPool bdrv_em_aio_pool = {
3601 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3602 .cancel = bdrv_aio_cancel_em,
3603};
3604
bellard83f64092006-08-01 16:21:11 +00003605static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00003606{
pbrookce1a14d2006-08-07 02:38:06 +00003607 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00003608
aliguorif141eaf2009-04-07 18:43:24 +00003609 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04003610 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00003611 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00003612 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03003613 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03003614 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00003615 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00003616}
bellardbeac80c2006-06-26 20:08:57 +00003617
aliguorif141eaf2009-04-07 18:43:24 +00003618static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3619 int64_t sector_num,
3620 QEMUIOVector *qiov,
3621 int nb_sectors,
3622 BlockDriverCompletionFunc *cb,
3623 void *opaque,
3624 int is_write)
3625
bellardea2384d2004-08-01 21:59:26 +00003626{
pbrookce1a14d2006-08-07 02:38:06 +00003627 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00003628
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003629 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00003630 acb->is_write = is_write;
3631 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00003632 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01003633 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00003634
3635 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04003636 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003637 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003638 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01003639 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00003640 }
3641
pbrookce1a14d2006-08-07 02:38:06 +00003642 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00003643
pbrookce1a14d2006-08-07 02:38:06 +00003644 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00003645}
3646
aliguorif141eaf2009-04-07 18:43:24 +00003647static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3648 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00003649 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00003650{
aliguorif141eaf2009-04-07 18:43:24 +00003651 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00003652}
3653
aliguorif141eaf2009-04-07 18:43:24 +00003654static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3655 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3656 BlockDriverCompletionFunc *cb, void *opaque)
3657{
3658 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3659}
3660
Kevin Wolf68485422011-06-30 10:05:46 +02003661
3662typedef struct BlockDriverAIOCBCoroutine {
3663 BlockDriverAIOCB common;
3664 BlockRequest req;
3665 bool is_write;
3666 QEMUBH* bh;
3667} BlockDriverAIOCBCoroutine;
3668
3669static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3670{
3671 qemu_aio_flush();
3672}
3673
3674static AIOPool bdrv_em_co_aio_pool = {
3675 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3676 .cancel = bdrv_aio_co_cancel_em,
3677};
3678
Paolo Bonzini35246a62011-10-14 10:41:29 +02003679static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02003680{
3681 BlockDriverAIOCBCoroutine *acb = opaque;
3682
3683 acb->common.cb(acb->common.opaque, acb->req.error);
3684 qemu_bh_delete(acb->bh);
3685 qemu_aio_release(acb);
3686}
3687
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003688/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3689static void coroutine_fn bdrv_co_do_rw(void *opaque)
3690{
3691 BlockDriverAIOCBCoroutine *acb = opaque;
3692 BlockDriverState *bs = acb->common.bs;
3693
3694 if (!acb->is_write) {
3695 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003696 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003697 } else {
3698 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003699 acb->req.nb_sectors, acb->req.qiov, 0);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003700 }
3701
Paolo Bonzini35246a62011-10-14 10:41:29 +02003702 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01003703 qemu_bh_schedule(acb->bh);
3704}
3705
Kevin Wolf68485422011-06-30 10:05:46 +02003706static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3707 int64_t sector_num,
3708 QEMUIOVector *qiov,
3709 int nb_sectors,
3710 BlockDriverCompletionFunc *cb,
3711 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003712 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02003713{
3714 Coroutine *co;
3715 BlockDriverAIOCBCoroutine *acb;
3716
3717 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3718 acb->req.sector = sector_num;
3719 acb->req.nb_sectors = nb_sectors;
3720 acb->req.qiov = qiov;
3721 acb->is_write = is_write;
3722
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01003723 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02003724 qemu_coroutine_enter(co, acb);
3725
3726 return &acb->common;
3727}
3728
Paolo Bonzini07f07612011-10-17 12:32:12 +02003729static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003730{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003731 BlockDriverAIOCBCoroutine *acb = opaque;
3732 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003733
Paolo Bonzini07f07612011-10-17 12:32:12 +02003734 acb->req.error = bdrv_co_flush(bs);
3735 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003736 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02003737}
3738
Paolo Bonzini07f07612011-10-17 12:32:12 +02003739BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02003740 BlockDriverCompletionFunc *cb, void *opaque)
3741{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003742 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003743
Paolo Bonzini07f07612011-10-17 12:32:12 +02003744 Coroutine *co;
3745 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02003746
Paolo Bonzini07f07612011-10-17 12:32:12 +02003747 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3748 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3749 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02003750
Alexander Graf016f5cf2010-05-26 17:51:49 +02003751 return &acb->common;
3752}
3753
Paolo Bonzini4265d622011-10-17 12:32:14 +02003754static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3755{
3756 BlockDriverAIOCBCoroutine *acb = opaque;
3757 BlockDriverState *bs = acb->common.bs;
3758
3759 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3760 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3761 qemu_bh_schedule(acb->bh);
3762}
3763
3764BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3765 int64_t sector_num, int nb_sectors,
3766 BlockDriverCompletionFunc *cb, void *opaque)
3767{
3768 Coroutine *co;
3769 BlockDriverAIOCBCoroutine *acb;
3770
3771 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3772
3773 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3774 acb->req.sector = sector_num;
3775 acb->req.nb_sectors = nb_sectors;
3776 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3777 qemu_coroutine_enter(co, acb);
3778
3779 return &acb->common;
3780}
3781
bellardea2384d2004-08-01 21:59:26 +00003782void bdrv_init(void)
3783{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05003784 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00003785}
pbrookce1a14d2006-08-07 02:38:06 +00003786
Markus Armbrustereb852012009-10-27 18:41:44 +01003787void bdrv_init_with_whitelist(void)
3788{
3789 use_bdrv_whitelist = 1;
3790 bdrv_init();
3791}
3792
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02003793void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3794 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00003795{
pbrookce1a14d2006-08-07 02:38:06 +00003796 BlockDriverAIOCB *acb;
3797
aliguori6bbff9a2009-03-20 18:25:59 +00003798 if (pool->free_aiocb) {
3799 acb = pool->free_aiocb;
3800 pool->free_aiocb = acb->next;
pbrookce1a14d2006-08-07 02:38:06 +00003801 } else {
Anthony Liguori7267c092011-08-20 22:09:37 -05003802 acb = g_malloc0(pool->aiocb_size);
aliguori6bbff9a2009-03-20 18:25:59 +00003803 acb->pool = pool;
pbrookce1a14d2006-08-07 02:38:06 +00003804 }
3805 acb->bs = bs;
3806 acb->cb = cb;
3807 acb->opaque = opaque;
3808 return acb;
3809}
3810
3811void qemu_aio_release(void *p)
3812{
aliguori6bbff9a2009-03-20 18:25:59 +00003813 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3814 AIOPool *pool = acb->pool;
3815 acb->next = pool->free_aiocb;
3816 pool->free_aiocb = acb;
pbrookce1a14d2006-08-07 02:38:06 +00003817}
bellard19cb3732006-08-19 11:45:59 +00003818
3819/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003820/* Coroutine block device emulation */
3821
3822typedef struct CoroutineIOCompletion {
3823 Coroutine *coroutine;
3824 int ret;
3825} CoroutineIOCompletion;
3826
3827static void bdrv_co_io_em_complete(void *opaque, int ret)
3828{
3829 CoroutineIOCompletion *co = opaque;
3830
3831 co->ret = ret;
3832 qemu_coroutine_enter(co->coroutine, NULL);
3833}
3834
3835static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3836 int nb_sectors, QEMUIOVector *iov,
3837 bool is_write)
3838{
3839 CoroutineIOCompletion co = {
3840 .coroutine = qemu_coroutine_self(),
3841 };
3842 BlockDriverAIOCB *acb;
3843
3844 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003845 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3846 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003847 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01003848 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3849 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003850 }
3851
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01003852 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02003853 if (!acb) {
3854 return -EIO;
3855 }
3856 qemu_coroutine_yield();
3857
3858 return co.ret;
3859}
3860
3861static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3862 int64_t sector_num, int nb_sectors,
3863 QEMUIOVector *iov)
3864{
3865 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3866}
3867
3868static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3869 int64_t sector_num, int nb_sectors,
3870 QEMUIOVector *iov)
3871{
3872 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3873}
3874
Paolo Bonzini07f07612011-10-17 12:32:12 +02003875static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003876{
Paolo Bonzini07f07612011-10-17 12:32:12 +02003877 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003878
Paolo Bonzini07f07612011-10-17 12:32:12 +02003879 rwco->ret = bdrv_co_flush(rwco->bs);
3880}
3881
3882int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3883{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003884 int ret;
3885
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003886 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02003887 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003888 }
3889
Kevin Wolfca716362011-11-10 18:13:59 +01003890 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003891 if (bs->drv->bdrv_co_flush_to_os) {
3892 ret = bs->drv->bdrv_co_flush_to_os(bs);
3893 if (ret < 0) {
3894 return ret;
3895 }
3896 }
3897
Kevin Wolfca716362011-11-10 18:13:59 +01003898 /* But don't actually force it to the disk with cache=unsafe */
3899 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02003900 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01003901 }
3902
Kevin Wolfeb489bb2011-11-10 18:10:11 +01003903 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003904 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003905 } else if (bs->drv->bdrv_aio_flush) {
3906 BlockDriverAIOCB *acb;
3907 CoroutineIOCompletion co = {
3908 .coroutine = qemu_coroutine_self(),
3909 };
3910
3911 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3912 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003913 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003914 } else {
3915 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003916 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02003917 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02003918 } else {
3919 /*
3920 * Some block drivers always operate in either writethrough or unsafe
3921 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3922 * know how the server works (because the behaviour is hardcoded or
3923 * depends on server-side configuration), so we can't ensure that
3924 * everything is safe on disk. Returning an error doesn't work because
3925 * that would break guests even if the server operates in writethrough
3926 * mode.
3927 *
3928 * Let's hope the user knows what he's doing.
3929 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003930 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003931 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003932 if (ret < 0) {
3933 return ret;
3934 }
3935
3936 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3937 * in the case of cache=unsafe, so there are no useless flushes.
3938 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02003939flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01003940 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02003941}
3942
Anthony Liguori0f154232011-11-14 15:09:45 -06003943void bdrv_invalidate_cache(BlockDriverState *bs)
3944{
3945 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3946 bs->drv->bdrv_invalidate_cache(bs);
3947 }
3948}
3949
3950void bdrv_invalidate_cache_all(void)
3951{
3952 BlockDriverState *bs;
3953
3954 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3955 bdrv_invalidate_cache(bs);
3956 }
3957}
3958
Benoît Canet07789262012-03-23 08:36:49 +01003959void bdrv_clear_incoming_migration_all(void)
3960{
3961 BlockDriverState *bs;
3962
3963 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3964 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3965 }
3966}
3967
Paolo Bonzini07f07612011-10-17 12:32:12 +02003968int bdrv_flush(BlockDriverState *bs)
3969{
3970 Coroutine *co;
3971 RwCo rwco = {
3972 .bs = bs,
3973 .ret = NOT_DONE,
3974 };
3975
3976 if (qemu_in_coroutine()) {
3977 /* Fast-path if already in coroutine context */
3978 bdrv_flush_co_entry(&rwco);
3979 } else {
3980 co = qemu_coroutine_create(bdrv_flush_co_entry);
3981 qemu_coroutine_enter(co, &rwco);
3982 while (rwco.ret == NOT_DONE) {
3983 qemu_aio_wait();
3984 }
3985 }
3986
3987 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02003988}
3989
Paolo Bonzini4265d622011-10-17 12:32:14 +02003990static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3991{
3992 RwCo *rwco = opaque;
3993
3994 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3995}
3996
3997int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3998 int nb_sectors)
3999{
4000 if (!bs->drv) {
4001 return -ENOMEDIUM;
4002 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4003 return -EIO;
4004 } else if (bs->read_only) {
4005 return -EROFS;
4006 } else if (bs->drv->bdrv_co_discard) {
4007 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
4008 } else if (bs->drv->bdrv_aio_discard) {
4009 BlockDriverAIOCB *acb;
4010 CoroutineIOCompletion co = {
4011 .coroutine = qemu_coroutine_self(),
4012 };
4013
4014 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4015 bdrv_co_io_em_complete, &co);
4016 if (acb == NULL) {
4017 return -EIO;
4018 } else {
4019 qemu_coroutine_yield();
4020 return co.ret;
4021 }
Paolo Bonzini4265d622011-10-17 12:32:14 +02004022 } else {
4023 return 0;
4024 }
4025}
4026
4027int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4028{
4029 Coroutine *co;
4030 RwCo rwco = {
4031 .bs = bs,
4032 .sector_num = sector_num,
4033 .nb_sectors = nb_sectors,
4034 .ret = NOT_DONE,
4035 };
4036
4037 if (qemu_in_coroutine()) {
4038 /* Fast-path if already in coroutine context */
4039 bdrv_discard_co_entry(&rwco);
4040 } else {
4041 co = qemu_coroutine_create(bdrv_discard_co_entry);
4042 qemu_coroutine_enter(co, &rwco);
4043 while (rwco.ret == NOT_DONE) {
4044 qemu_aio_wait();
4045 }
4046 }
4047
4048 return rwco.ret;
4049}
4050
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004051/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004052/* removable device support */
4053
4054/**
4055 * Return TRUE if the media is present
4056 */
4057int bdrv_is_inserted(BlockDriverState *bs)
4058{
4059 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004060
bellard19cb3732006-08-19 11:45:59 +00004061 if (!drv)
4062 return 0;
4063 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004064 return 1;
4065 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004066}
4067
4068/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004069 * Return whether the media changed since the last call to this
4070 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004071 */
4072int bdrv_media_changed(BlockDriverState *bs)
4073{
4074 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004075
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004076 if (drv && drv->bdrv_media_changed) {
4077 return drv->bdrv_media_changed(bs);
4078 }
4079 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00004080}
4081
4082/**
4083 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
4084 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02004085void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00004086{
4087 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004088
Markus Armbruster822e1cd2011-07-20 18:23:42 +02004089 if (drv && drv->bdrv_eject) {
4090 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00004091 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02004092
4093 if (bs->device_name[0] != '\0') {
4094 bdrv_emit_qmp_eject_event(bs, eject_flag);
4095 }
bellard19cb3732006-08-19 11:45:59 +00004096}
4097
bellard19cb3732006-08-19 11:45:59 +00004098/**
4099 * Lock or unlock the media (if it is locked, the user won't be able
4100 * to eject it manually).
4101 */
Markus Armbruster025e8492011-09-06 18:58:47 +02004102void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00004103{
4104 BlockDriver *drv = bs->drv;
4105
Markus Armbruster025e8492011-09-06 18:58:47 +02004106 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01004107
Markus Armbruster025e8492011-09-06 18:58:47 +02004108 if (drv && drv->bdrv_lock_medium) {
4109 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00004110 }
4111}
ths985a03b2007-12-24 16:10:43 +00004112
4113/* needed for generic scsi interface */
4114
4115int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
4116{
4117 BlockDriver *drv = bs->drv;
4118
4119 if (drv && drv->bdrv_ioctl)
4120 return drv->bdrv_ioctl(bs, req, buf);
4121 return -ENOTSUP;
4122}
aliguori7d780662009-03-12 19:57:08 +00004123
aliguori221f7152009-03-28 17:28:41 +00004124BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
4125 unsigned long int req, void *buf,
4126 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00004127{
aliguori221f7152009-03-28 17:28:41 +00004128 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00004129
aliguori221f7152009-03-28 17:28:41 +00004130 if (drv && drv->bdrv_aio_ioctl)
4131 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
4132 return NULL;
aliguori7d780662009-03-12 19:57:08 +00004133}
aliguorie268ca52009-04-22 20:20:00 +00004134
Markus Armbruster7b6f9302011-09-06 18:58:56 +02004135void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
4136{
4137 bs->buffer_alignment = align;
4138}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004139
aliguorie268ca52009-04-22 20:20:00 +00004140void *qemu_blockalign(BlockDriverState *bs, size_t size)
4141{
4142 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
4143}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004144
4145void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
4146{
4147 int64_t bitmap_size;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004148
Liran Schouraaa0eb72010-01-26 10:31:48 +02004149 bs->dirty_count = 0;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004150 if (enable) {
Jan Kiszkac6d22832009-11-30 18:21:20 +01004151 if (!bs->dirty_bitmap) {
4152 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
Paolo Bonzini71df14f2012-04-12 14:01:04 +02004153 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
4154 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004155
Paolo Bonzini71df14f2012-04-12 14:01:04 +02004156 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004157 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004158 } else {
Jan Kiszkac6d22832009-11-30 18:21:20 +01004159 if (bs->dirty_bitmap) {
Anthony Liguori7267c092011-08-20 22:09:37 -05004160 g_free(bs->dirty_bitmap);
Jan Kiszkac6d22832009-11-30 18:21:20 +01004161 bs->dirty_bitmap = NULL;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004162 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004163 }
4164}
4165
4166int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
4167{
Jan Kiszka6ea44302009-11-30 18:21:19 +01004168 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004169
Jan Kiszkac6d22832009-11-30 18:21:20 +01004170 if (bs->dirty_bitmap &&
4171 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
Marcelo Tosatti6d59fec2010-11-08 17:02:54 -02004172 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
4173 (1UL << (chunk % (sizeof(unsigned long) * 8))));
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004174 } else {
4175 return 0;
4176 }
4177}
4178
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004179void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
4180 int nr_sectors)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02004181{
4182 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
4183}
Liran Schouraaa0eb72010-01-26 10:31:48 +02004184
4185int64_t bdrv_get_dirty_count(BlockDriverState *bs)
4186{
4187 return bs->dirty_count;
4188}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004189
Marcelo Tosattidb593f22011-01-26 12:12:34 -02004190void bdrv_set_in_use(BlockDriverState *bs, int in_use)
4191{
4192 assert(bs->in_use != in_use);
4193 bs->in_use = in_use;
4194}
4195
4196int bdrv_in_use(BlockDriverState *bs)
4197{
4198 return bs->in_use;
4199}
4200
Luiz Capitulino28a72822011-09-26 17:43:50 -03004201void bdrv_iostatus_enable(BlockDriverState *bs)
4202{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004203 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004204 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004205}
4206
4207/* The I/O status is only enabled if the drive explicitly
4208 * enables it _and_ the VM is configured to stop on errors */
4209bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
4210{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004211 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02004212 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
4213 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
4214 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03004215}
4216
4217void bdrv_iostatus_disable(BlockDriverState *bs)
4218{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03004219 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004220}
4221
4222void bdrv_iostatus_reset(BlockDriverState *bs)
4223{
4224 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004225 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004226 }
4227}
4228
4229/* XXX: Today this is set by device models because it makes the implementation
4230 quite simple. However, the block layer knows about the error, so it's
4231 possible to implement this without device models being involved */
4232void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
4233{
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004234 if (bdrv_iostatus_is_enabled(bs) &&
4235 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino28a72822011-09-26 17:43:50 -03004236 assert(error >= 0);
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03004237 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
4238 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03004239 }
4240}
4241
Christoph Hellwiga597e792011-08-25 08:26:01 +02004242void
4243bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4244 enum BlockAcctType type)
4245{
4246 assert(type < BDRV_MAX_IOTYPE);
4247
4248 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004249 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02004250 cookie->type = type;
4251}
4252
4253void
4254bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4255{
4256 assert(cookie->type < BDRV_MAX_IOTYPE);
4257
4258 bs->nr_bytes[cookie->type] += cookie->bytes;
4259 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02004260 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02004261}
4262
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004263int bdrv_img_create(const char *filename, const char *fmt,
4264 const char *base_filename, const char *base_fmt,
4265 char *options, uint64_t img_size, int flags)
4266{
4267 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02004268 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004269 BlockDriverState *bs = NULL;
4270 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004271 BlockDriver *backing_drv = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004272 int ret = 0;
4273
4274 /* Find driver and parse its options */
4275 drv = bdrv_find_format(fmt);
4276 if (!drv) {
4277 error_report("Unknown file format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004278 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004279 goto out;
4280 }
4281
4282 proto_drv = bdrv_find_protocol(filename);
4283 if (!proto_drv) {
4284 error_report("Unknown protocol '%s'", filename);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004285 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004286 goto out;
4287 }
4288
4289 create_options = append_option_parameters(create_options,
4290 drv->create_options);
4291 create_options = append_option_parameters(create_options,
4292 proto_drv->create_options);
4293
4294 /* Create parameter list with default values */
4295 param = parse_option_parameters("", create_options, param);
4296
4297 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4298
4299 /* Parse -o options */
4300 if (options) {
4301 param = parse_option_parameters(options, create_options, param);
4302 if (param == NULL) {
4303 error_report("Invalid options for file format '%s'.", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004304 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004305 goto out;
4306 }
4307 }
4308
4309 if (base_filename) {
4310 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4311 base_filename)) {
4312 error_report("Backing file not supported for file format '%s'",
4313 fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004314 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004315 goto out;
4316 }
4317 }
4318
4319 if (base_fmt) {
4320 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4321 error_report("Backing file format not supported for file "
4322 "format '%s'", fmt);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004323 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004324 goto out;
4325 }
4326 }
4327
Jes Sorensen792da932010-12-16 13:52:17 +01004328 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4329 if (backing_file && backing_file->value.s) {
4330 if (!strcmp(filename, backing_file->value.s)) {
4331 error_report("Error: Trying to create an image with the "
4332 "same filename as the backing file");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004333 ret = -EINVAL;
Jes Sorensen792da932010-12-16 13:52:17 +01004334 goto out;
4335 }
4336 }
4337
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004338 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4339 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004340 backing_drv = bdrv_find_format(backing_fmt->value.s);
4341 if (!backing_drv) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004342 error_report("Unknown backing file format '%s'",
4343 backing_fmt->value.s);
Jes Sorensen4f70f242010-12-16 13:52:18 +01004344 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004345 goto out;
4346 }
4347 }
4348
4349 // The size for the image must always be specified, with one exception:
4350 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02004351 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4352 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004353 if (backing_file && backing_file->value.s) {
4354 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004355 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02004356 int back_flags;
4357
4358 /* backing files always opened read-only */
4359 back_flags =
4360 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004361
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004362 bs = bdrv_new("");
4363
Paolo Bonzini63090da2012-04-12 14:01:03 +02004364 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004365 if (ret < 0) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00004366 error_report("Could not open '%s'", backing_file->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004367 goto out;
4368 }
4369 bdrv_get_geometry(bs, &size);
4370 size *= 512;
4371
4372 snprintf(buf, sizeof(buf), "%" PRId64, size);
4373 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4374 } else {
4375 error_report("Image creation needs a size parameter");
Jes Sorensen4f70f242010-12-16 13:52:18 +01004376 ret = -EINVAL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004377 goto out;
4378 }
4379 }
4380
4381 printf("Formatting '%s', fmt=%s ", filename, fmt);
4382 print_option_parameters(param);
4383 puts("");
4384
4385 ret = bdrv_create(drv, filename, param);
4386
4387 if (ret < 0) {
4388 if (ret == -ENOTSUP) {
4389 error_report("Formatting or formatting option not supported for "
4390 "file format '%s'", fmt);
4391 } else if (ret == -EFBIG) {
4392 error_report("The image size is too large for file format '%s'",
4393 fmt);
4394 } else {
4395 error_report("%s: error while creating %s: %s", filename, fmt,
4396 strerror(-ret));
4397 }
4398 }
4399
4400out:
4401 free_option_parameters(create_options);
4402 free_option_parameters(param);
4403
4404 if (bs) {
4405 bdrv_delete(bs);
4406 }
Jes Sorensen4f70f242010-12-16 13:52:18 +01004407
4408 return ret;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01004409}