blob: 76fcc1d6d36a754448d36e9f24c81cb17072d12b [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020061static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000062 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020063 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020064static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000065 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020066 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020079static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
83 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020084 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020085 void *opaque,
86 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200182 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 QEMU_CLOCK_VIRTUAL,
184 bdrv_throttle_read_timer_cb,
185 bdrv_throttle_write_timer_cb,
186 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800187 bs->io_limits_enabled = true;
188}
189
Benoît Canetcc0681c2013-09-02 14:14:39 +0200190/* This function makes an IO wait if needed
191 *
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
194 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800195static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100196 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200197 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200199 /* does this io must wait */
200 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* if must wait or any request of this type throttled queue the IO */
203 if (must_wait ||
204 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
205 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800206 }
207
Benoît Canetcc0681c2013-09-02 14:14:39 +0200208 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100209 throttle_account(&bs->throttle_state, is_write, bytes);
210
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800211
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
214 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800215 }
216
Benoît Canetcc0681c2013-09-02 14:14:39 +0200217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800219}
220
Kevin Wolf339064d2013-11-28 10:23:32 +0100221size_t bdrv_opt_mem_align(BlockDriverState *bs)
222{
223 if (!bs || !bs->drv) {
224 /* 4k should be on the safe side */
225 return 4096;
226 }
227
228 return bs->bl.opt_mem_alignment;
229}
230
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000231/* check if the path starts with "<protocol>:" */
232static int path_has_protocol(const char *path)
233{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200234 const char *p;
235
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000236#ifdef _WIN32
237 if (is_windows_drive(path) ||
238 is_windows_drive_prefix(path)) {
239 return 0;
240 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200241 p = path + strcspn(path, ":/\\");
242#else
243 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244#endif
245
Paolo Bonzini947995c2012-05-08 16:51:48 +0200246 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247}
248
bellard83f64092006-08-01 16:21:11 +0000249int path_is_absolute(const char *path)
250{
bellard21664422007-01-07 18:22:37 +0000251#ifdef _WIN32
252 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200253 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000254 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200255 }
256 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000257#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000259#endif
bellard83f64092006-08-01 16:21:11 +0000260}
261
262/* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
264 supported. */
265void path_combine(char *dest, int dest_size,
266 const char *base_path,
267 const char *filename)
268{
269 const char *p, *p1;
270 int len;
271
272 if (dest_size <= 0)
273 return;
274 if (path_is_absolute(filename)) {
275 pstrcpy(dest, dest_size, filename);
276 } else {
277 p = strchr(base_path, ':');
278 if (p)
279 p++;
280 else
281 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000282 p1 = strrchr(base_path, '/');
283#ifdef _WIN32
284 {
285 const char *p2;
286 p2 = strrchr(base_path, '\\');
287 if (!p1 || p2 > p1)
288 p1 = p2;
289 }
290#endif
bellard83f64092006-08-01 16:21:11 +0000291 if (p1)
292 p1++;
293 else
294 p1 = base_path;
295 if (p1 > p)
296 p = p1;
297 len = p - base_path;
298 if (len > dest_size - 1)
299 len = dest_size - 1;
300 memcpy(dest, base_path, len);
301 dest[len] = '\0';
302 pstrcat(dest, dest_size, filename);
303 }
304}
305
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200306void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307{
308 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
309 pstrcpy(dest, sz, bs->backing_file);
310 } else {
311 path_combine(dest, sz, bs->filename, bs->backing_file);
312 }
313}
314
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500315void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000316{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200319 bdrv->bdrv_co_readv = bdrv_co_readv_em;
320 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
324 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200325 if (!bdrv->bdrv_aio_readv) {
326 /* add AIO emulation layer */
327 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
328 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200329 }
bellard83f64092006-08-01 16:21:11 +0000330 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200331
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100332 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000333}
bellardb3380822004-03-14 21:38:54 +0000334
Markus Armbruster7f06d472014-10-07 13:59:12 +0200335BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200337 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200338
Markus Armbrustere4e99862014-10-07 13:59:03 +0200339 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200340 return bs;
341}
342
343BlockDriverState *bdrv_new(void)
344{
345 BlockDriverState *bs;
346 int i;
347
Markus Armbruster5839e532014-08-19 10:31:08 +0200348 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800349 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800350 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
351 QLIST_INIT(&bs->op_blockers[i]);
352 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300353 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200354 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200355 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200356 qemu_co_queue_init(&bs->throttled_reqs[0]);
357 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800358 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200359 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200360
bellardb3380822004-03-14 21:38:54 +0000361 return bs;
362}
363
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200364void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
365{
366 notifier_list_add(&bs->close_notifiers, notify);
367}
368
bellardea2384d2004-08-01 21:59:26 +0000369BlockDriver *bdrv_find_format(const char *format_name)
370{
371 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100372 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
373 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000374 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100375 }
bellardea2384d2004-08-01 21:59:26 +0000376 }
377 return NULL;
378}
379
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800380static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100381{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800382 static const char *whitelist_rw[] = {
383 CONFIG_BDRV_RW_WHITELIST
384 };
385 static const char *whitelist_ro[] = {
386 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100387 };
388 const char **p;
389
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100391 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800392 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100393
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800394 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100395 if (!strcmp(drv->format_name, *p)) {
396 return 1;
397 }
398 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800399 if (read_only) {
400 for (p = whitelist_ro; *p; p++) {
401 if (!strcmp(drv->format_name, *p)) {
402 return 1;
403 }
404 }
405 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100406 return 0;
407}
408
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800409BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
410 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100411{
412 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800413 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100414}
415
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800416typedef struct CreateCo {
417 BlockDriver *drv;
418 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800419 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200421 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800422} CreateCo;
423
424static void coroutine_fn bdrv_create_co_entry(void *opaque)
425{
Max Reitzcc84d902013-09-06 17:14:26 +0200426 Error *local_err = NULL;
427 int ret;
428
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800429 CreateCo *cco = opaque;
430 assert(cco->drv);
431
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800432 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100433 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200434 error_propagate(&cco->err, local_err);
435 }
436 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800437}
438
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200439int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800440 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000441{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200443
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800444 Coroutine *co;
445 CreateCo cco = {
446 .drv = drv,
447 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800448 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800449 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200450 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800451 };
452
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800453 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200454 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300455 ret = -ENOTSUP;
456 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800457 }
458
459 if (qemu_in_coroutine()) {
460 /* Fast-path if already in coroutine context */
461 bdrv_create_co_entry(&cco);
462 } else {
463 co = qemu_coroutine_create(bdrv_create_co_entry);
464 qemu_coroutine_enter(co, &cco);
465 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200466 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800467 }
468 }
469
470 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200471 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100472 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200473 error_propagate(errp, cco.err);
474 } else {
475 error_setg_errno(errp, -ret, "Could not create image");
476 }
477 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800478
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300479out:
480 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800481 return ret;
bellardea2384d2004-08-01 21:59:26 +0000482}
483
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800484int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200485{
486 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200487 Error *local_err = NULL;
488 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200489
Kevin Wolf98289622013-07-10 15:47:39 +0200490 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200491 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200492 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000493 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200494 }
495
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800496 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100497 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200498 error_propagate(errp, local_err);
499 }
500 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200501}
502
Kevin Wolf3baca892014-07-16 17:48:16 +0200503void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100504{
505 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200506 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100507
508 memset(&bs->bl, 0, sizeof(bs->bl));
509
Kevin Wolf466ad822013-12-11 19:50:32 +0100510 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200511 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 /* Take some limits from the children as a default */
515 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200516 bdrv_refresh_limits(bs->file, &local_err);
517 if (local_err) {
518 error_propagate(errp, local_err);
519 return;
520 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100521 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100522 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100523 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
524 } else {
525 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100526 }
527
528 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200529 bdrv_refresh_limits(bs->backing_hd, &local_err);
530 if (local_err) {
531 error_propagate(errp, local_err);
532 return;
533 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100534 bs->bl.opt_transfer_length =
535 MAX(bs->bl.opt_transfer_length,
536 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100537 bs->bl.max_transfer_length =
538 MIN_NON_ZERO(bs->bl.max_transfer_length,
539 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100540 bs->bl.opt_mem_alignment =
541 MAX(bs->bl.opt_mem_alignment,
542 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100543 }
544
545 /* Then let the driver override it */
546 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200547 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100548 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100549}
550
Jim Meyeringeba25052012-05-28 09:27:54 +0200551/*
552 * Create a uniquely-named empty temporary file.
553 * Return 0 upon success, otherwise a negative errno value.
554 */
555int get_tmp_filename(char *filename, int size)
556{
bellardd5249392004-08-03 21:14:23 +0000557#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000558 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200559 /* GetTempFileName requires that its output buffer (4th param)
560 have length MAX_PATH or greater. */
561 assert(size >= MAX_PATH);
562 return (GetTempPath(MAX_PATH, temp_dir)
563 && GetTempFileName(temp_dir, "qem", 0, filename)
564 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000565#else
bellardea2384d2004-08-01 21:59:26 +0000566 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000567 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000568 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530569 if (!tmpdir) {
570 tmpdir = "/var/tmp";
571 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200572 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
573 return -EOVERFLOW;
574 }
bellardea2384d2004-08-01 21:59:26 +0000575 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800576 if (fd < 0) {
577 return -errno;
578 }
579 if (close(fd) != 0) {
580 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200581 return -errno;
582 }
583 return 0;
bellardd5249392004-08-03 21:14:23 +0000584#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200585}
bellardea2384d2004-08-01 21:59:26 +0000586
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200587/*
588 * Detect host devices. By convention, /dev/cdrom[N] is always
589 * recognized as a host CDROM.
590 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200591static BlockDriver *find_hdev_driver(const char *filename)
592{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200593 int score_max = 0, score;
594 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200595
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100596 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200597 if (d->bdrv_probe_device) {
598 score = d->bdrv_probe_device(filename);
599 if (score > score_max) {
600 score_max = score;
601 drv = d;
602 }
603 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200604 }
605
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200606 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200607}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200608
Kevin Wolf98289622013-07-10 15:47:39 +0200609BlockDriver *bdrv_find_protocol(const char *filename,
610 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200611{
612 BlockDriver *drv1;
613 char protocol[128];
614 int len;
615 const char *p;
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* TODO Drivers without bdrv_file_open must be specified explicitly */
618
Christoph Hellwig39508e72010-06-23 12:25:17 +0200619 /*
620 * XXX(hch): we really should not let host device detection
621 * override an explicit protocol specification, but moving this
622 * later breaks access to device names with colons in them.
623 * Thanks to the brain-dead persistent naming schemes on udev-
624 * based Linux systems those actually are quite common.
625 */
626 drv1 = find_hdev_driver(filename);
627 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200628 return drv1;
629 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200630
Kevin Wolf98289622013-07-10 15:47:39 +0200631 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200632 return bdrv_find_format("file");
633 }
Kevin Wolf98289622013-07-10 15:47:39 +0200634
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000635 p = strchr(filename, ':');
636 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200637 len = p - filename;
638 if (len > sizeof(protocol) - 1)
639 len = sizeof(protocol) - 1;
640 memcpy(protocol, filename, len);
641 protocol[len] = '\0';
642 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
643 if (drv1->protocol_name &&
644 !strcmp(drv1->protocol_name, protocol)) {
645 return drv1;
646 }
647 }
648 return NULL;
649}
650
Kevin Wolff500a6d2012-11-12 17:35:27 +0100651static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200652 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000653{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100654 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000655 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000656 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100657 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700658
Kevin Wolf08a00552010-06-01 18:37:31 +0200659 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100660 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200661 drv = bdrv_find_format("raw");
662 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200663 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200664 ret = -ENOENT;
665 }
666 *pdrv = drv;
667 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700668 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700669
bellard83f64092006-08-01 16:21:11 +0000670 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000671 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200672 error_setg_errno(errp, -ret, "Could not read image for determining its "
673 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200674 *pdrv = NULL;
675 return ret;
bellard83f64092006-08-01 16:21:11 +0000676 }
677
bellardea2384d2004-08-01 21:59:26 +0000678 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200679 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100680 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000681 if (drv1->bdrv_probe) {
682 score = drv1->bdrv_probe(buf, ret, filename);
683 if (score > score_max) {
684 score_max = score;
685 drv = drv1;
686 }
bellardea2384d2004-08-01 21:59:26 +0000687 }
688 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200689 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200690 error_setg(errp, "Could not determine image format: No compatible "
691 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200692 ret = -ENOENT;
693 }
694 *pdrv = drv;
695 return ret;
bellardea2384d2004-08-01 21:59:26 +0000696}
697
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100698/**
699 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200700 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100701 */
702static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
703{
704 BlockDriver *drv = bs->drv;
705
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700706 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
707 if (bs->sg)
708 return 0;
709
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100710 /* query actual device if possible, otherwise just trust the hint */
711 if (drv->bdrv_getlength) {
712 int64_t length = drv->bdrv_getlength(bs);
713 if (length < 0) {
714 return length;
715 }
Fam Zheng7e382002013-11-06 19:48:06 +0800716 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100717 }
718
719 bs->total_sectors = hint;
720 return 0;
721}
722
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100723/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100724 * Set open flags for a given discard mode
725 *
726 * Return 0 on success, -1 if the discard mode was invalid.
727 */
728int bdrv_parse_discard_flags(const char *mode, int *flags)
729{
730 *flags &= ~BDRV_O_UNMAP;
731
732 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
733 /* do nothing */
734 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
735 *flags |= BDRV_O_UNMAP;
736 } else {
737 return -1;
738 }
739
740 return 0;
741}
742
743/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100744 * Set open flags for a given cache mode
745 *
746 * Return 0 on success, -1 if the cache mode was invalid.
747 */
748int bdrv_parse_cache_flags(const char *mode, int *flags)
749{
750 *flags &= ~BDRV_O_CACHE_MASK;
751
752 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
753 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100754 } else if (!strcmp(mode, "directsync")) {
755 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100756 } else if (!strcmp(mode, "writeback")) {
757 *flags |= BDRV_O_CACHE_WB;
758 } else if (!strcmp(mode, "unsafe")) {
759 *flags |= BDRV_O_CACHE_WB;
760 *flags |= BDRV_O_NO_FLUSH;
761 } else if (!strcmp(mode, "writethrough")) {
762 /* this is the default */
763 } else {
764 return -1;
765 }
766
767 return 0;
768}
769
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000770/**
771 * The copy-on-read flag is actually a reference count so multiple users may
772 * use the feature without worrying about clobbering its previous state.
773 * Copy-on-read stays enabled until all users have called to disable it.
774 */
775void bdrv_enable_copy_on_read(BlockDriverState *bs)
776{
777 bs->copy_on_read++;
778}
779
780void bdrv_disable_copy_on_read(BlockDriverState *bs)
781{
782 assert(bs->copy_on_read > 0);
783 bs->copy_on_read--;
784}
785
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200786/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200787 * Returns the flags that a temporary snapshot should get, based on the
788 * originally requested flags (the originally requested image will have flags
789 * like a backing file)
790 */
791static int bdrv_temp_snapshot_flags(int flags)
792{
793 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
794}
795
796/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200797 * Returns the flags that bs->file should get, based on the given flags for
798 * the parent BDS
799 */
800static int bdrv_inherited_flags(int flags)
801{
802 /* Enable protocol handling, disable format probing for bs->file */
803 flags |= BDRV_O_PROTOCOL;
804
805 /* Our block drivers take care to send flushes and respect unmap policy,
806 * so we can enable both unconditionally on lower layers. */
807 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
808
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200809 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200810 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200811
812 return flags;
813}
814
Kevin Wolf317fc442014-04-25 13:27:34 +0200815/*
816 * Returns the flags that bs->backing_hd should get, based on the given flags
817 * for the parent BDS
818 */
819static int bdrv_backing_flags(int flags)
820{
821 /* backing files always opened read-only */
822 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
823
824 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200825 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200826
827 return flags;
828}
829
Kevin Wolf7b272452012-11-12 17:05:39 +0100830static int bdrv_open_flags(BlockDriverState *bs, int flags)
831{
832 int open_flags = flags | BDRV_O_CACHE_WB;
833
834 /*
835 * Clear flags that are internal to the block layer before opening the
836 * image.
837 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200838 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100839
840 /*
841 * Snapshots should be writable.
842 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200843 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100844 open_flags |= BDRV_O_RDWR;
845 }
846
847 return open_flags;
848}
849
Kevin Wolf636ea372014-01-24 14:11:52 +0100850static void bdrv_assign_node_name(BlockDriverState *bs,
851 const char *node_name,
852 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100853{
854 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100855 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100856 }
857
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200858 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200859 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200860 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100861 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100862 }
863
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100864 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200865 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100866 error_setg(errp, "node-name=%s is conflicting with a device id",
867 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100868 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100869 }
870
Benoît Canet6913c0c2014-01-23 21:31:33 +0100871 /* takes care of avoiding duplicates node names */
872 if (bdrv_find_node(node_name)) {
873 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100874 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100875 }
876
877 /* copy node name into the bs and insert it into the graph list */
878 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
879 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100880}
881
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200882/*
Kevin Wolf57915332010-04-14 15:24:50 +0200883 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100884 *
885 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200886 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100887static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200888 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200889{
890 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200891 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100892 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200893 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200894
895 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200896 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100897 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200898
Kevin Wolf45673672013-04-22 17:48:40 +0200899 if (file != NULL) {
900 filename = file->filename;
901 } else {
902 filename = qdict_get_try_str(options, "filename");
903 }
904
Kevin Wolf765003d2014-02-03 14:49:42 +0100905 if (drv->bdrv_needs_filename && !filename) {
906 error_setg(errp, "The '%s' block driver requires a file name",
907 drv->format_name);
908 return -EINVAL;
909 }
910
Kevin Wolf45673672013-04-22 17:48:40 +0200911 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100912
Benoît Canet6913c0c2014-01-23 21:31:33 +0100913 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100914 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200915 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100916 error_propagate(errp, local_err);
917 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100918 }
919 qdict_del(options, "node-name");
920
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100921 /* bdrv_open() with directly using a protocol as drv. This layer is already
922 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
923 * and return immediately. */
924 if (file != NULL && drv->bdrv_file_open) {
925 bdrv_swap(file, bs);
926 return 0;
927 }
928
Kevin Wolf57915332010-04-14 15:24:50 +0200929 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100930 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100931 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800932 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800933 open_flags = bdrv_open_flags(bs, flags);
934 bs->read_only = !(open_flags & BDRV_O_RDWR);
Kevin Wolf20cca272014-06-04 14:33:27 +0200935 bs->growable = !!(flags & BDRV_O_PROTOCOL);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800936
937 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200938 error_setg(errp,
939 !bs->read_only && bdrv_is_whitelisted(drv, true)
940 ? "Driver '%s' can only be used for read-only devices"
941 : "Driver '%s' is not whitelisted",
942 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800943 return -ENOTSUP;
944 }
Kevin Wolf57915332010-04-14 15:24:50 +0200945
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000946 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200947 if (flags & BDRV_O_COPY_ON_READ) {
948 if (!bs->read_only) {
949 bdrv_enable_copy_on_read(bs);
950 } else {
951 error_setg(errp, "Can't use copy-on-read on read-only device");
952 return -EINVAL;
953 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000954 }
955
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100956 if (filename != NULL) {
957 pstrcpy(bs->filename, sizeof(bs->filename), filename);
958 } else {
959 bs->filename[0] = '\0';
960 }
Max Reitz91af7012014-07-18 20:24:56 +0200961 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200962
Kevin Wolf57915332010-04-14 15:24:50 +0200963 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500964 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200965
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100966 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100967
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200968 /* Open the image, either directly or using a protocol */
969 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100970 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200971 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200972 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100973 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200974 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200975 error_setg(errp, "Can't use '%s' as a block driver for the "
976 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200977 ret = -EINVAL;
978 goto free_and_fail;
979 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100980 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200981 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200982 }
983
Kevin Wolf57915332010-04-14 15:24:50 +0200984 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100985 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200986 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800987 } else if (bs->filename[0]) {
988 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200989 } else {
990 error_setg_errno(errp, -ret, "Could not open image");
991 }
Kevin Wolf57915332010-04-14 15:24:50 +0200992 goto free_and_fail;
993 }
994
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100995 ret = refresh_total_sectors(bs, bs->total_sectors);
996 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200997 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100998 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200999 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001000
Kevin Wolf3baca892014-07-16 17:48:16 +02001001 bdrv_refresh_limits(bs, &local_err);
1002 if (local_err) {
1003 error_propagate(errp, local_err);
1004 ret = -EINVAL;
1005 goto free_and_fail;
1006 }
1007
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001008 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001009 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001010 return 0;
1011
1012free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001013 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001014 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001015 bs->opaque = NULL;
1016 bs->drv = NULL;
1017 return ret;
1018}
1019
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001020static QDict *parse_json_filename(const char *filename, Error **errp)
1021{
1022 QObject *options_obj;
1023 QDict *options;
1024 int ret;
1025
1026 ret = strstart(filename, "json:", &filename);
1027 assert(ret);
1028
1029 options_obj = qobject_from_json(filename);
1030 if (!options_obj) {
1031 error_setg(errp, "Could not parse the JSON options");
1032 return NULL;
1033 }
1034
1035 if (qobject_type(options_obj) != QTYPE_QDICT) {
1036 qobject_decref(options_obj);
1037 error_setg(errp, "Invalid JSON object given");
1038 return NULL;
1039 }
1040
1041 options = qobject_to_qdict(options_obj);
1042 qdict_flatten(options);
1043
1044 return options;
1045}
1046
Kevin Wolf57915332010-04-14 15:24:50 +02001047/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001048 * Fills in default options for opening images and converts the legacy
1049 * filename/flags pair to option QDict entries.
1050 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001051static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001052 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001053{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001054 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001055 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001056 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001057 bool parse_filename = false;
1058 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001059
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001060 /* Parse json: pseudo-protocol */
1061 if (filename && g_str_has_prefix(filename, "json:")) {
1062 QDict *json_options = parse_json_filename(filename, &local_err);
1063 if (local_err) {
1064 error_propagate(errp, local_err);
1065 return -EINVAL;
1066 }
1067
1068 /* Options given in the filename have lower priority than options
1069 * specified directly */
1070 qdict_join(*options, json_options, false);
1071 QDECREF(json_options);
1072 *pfilename = filename = NULL;
1073 }
1074
Kevin Wolff54120f2014-05-26 11:09:59 +02001075 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001076 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001077 if (!qdict_haskey(*options, "filename")) {
1078 qdict_put(*options, "filename", qstring_from_str(filename));
1079 parse_filename = true;
1080 } else {
1081 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1082 "the same time");
1083 return -EINVAL;
1084 }
1085 }
1086
1087 /* Find the right block driver */
1088 filename = qdict_get_try_str(*options, "filename");
1089 drvname = qdict_get_try_str(*options, "driver");
1090
Kevin Wolf17b005f2014-05-27 10:50:29 +02001091 if (drv) {
1092 if (drvname) {
1093 error_setg(errp, "Driver specified twice");
1094 return -EINVAL;
1095 }
1096 drvname = drv->format_name;
1097 qdict_put(*options, "driver", qstring_from_str(drvname));
1098 } else {
1099 if (!drvname && protocol) {
1100 if (filename) {
1101 drv = bdrv_find_protocol(filename, parse_filename);
1102 if (!drv) {
1103 error_setg(errp, "Unknown protocol");
1104 return -EINVAL;
1105 }
1106
1107 drvname = drv->format_name;
1108 qdict_put(*options, "driver", qstring_from_str(drvname));
1109 } else {
1110 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001111 return -EINVAL;
1112 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001113 } else if (drvname) {
1114 drv = bdrv_find_format(drvname);
1115 if (!drv) {
1116 error_setg(errp, "Unknown driver '%s'", drvname);
1117 return -ENOENT;
1118 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001119 }
1120 }
1121
Kevin Wolf17b005f2014-05-27 10:50:29 +02001122 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001123
1124 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001125 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001126 drv->bdrv_parse_filename(filename, *options, &local_err);
1127 if (local_err) {
1128 error_propagate(errp, local_err);
1129 return -EINVAL;
1130 }
1131
1132 if (!drv->bdrv_needs_filename) {
1133 qdict_del(*options, "filename");
1134 }
1135 }
1136
1137 return 0;
1138}
1139
Fam Zheng8d24cce2014-05-23 21:29:45 +08001140void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1141{
1142
Fam Zheng826b6ca2014-05-23 21:29:47 +08001143 if (bs->backing_hd) {
1144 assert(bs->backing_blocker);
1145 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1146 } else if (backing_hd) {
1147 error_setg(&bs->backing_blocker,
1148 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001149 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001150 }
1151
Fam Zheng8d24cce2014-05-23 21:29:45 +08001152 bs->backing_hd = backing_hd;
1153 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001154 error_free(bs->backing_blocker);
1155 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001156 goto out;
1157 }
1158 bs->open_flags &= ~BDRV_O_NO_BACKING;
1159 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1160 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1161 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001162
1163 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1164 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1165 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1166 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001167out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001168 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001169}
1170
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001171/*
1172 * Opens the backing file for a BlockDriverState if not yet open
1173 *
1174 * options is a QDict of options to pass to the block drivers, or NULL for an
1175 * empty set of options. The reference to the QDict is transferred to this
1176 * function (even on failure), so if the caller intends to reuse the dictionary,
1177 * it needs to use QINCREF() before calling bdrv_file_open.
1178 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001179int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001180{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001181 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001182 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001183 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001184 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001185 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001186
1187 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001188 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001189 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001190 }
1191
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001192 /* NULL means an empty set of options */
1193 if (options == NULL) {
1194 options = qdict_new();
1195 }
1196
Paolo Bonzini9156df12012-10-18 16:49:17 +02001197 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001198 if (qdict_haskey(options, "file.filename")) {
1199 backing_filename[0] = '\0';
1200 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001201 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001202 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001203 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001204 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001205 }
1206
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001207 if (!bs->drv || !bs->drv->supports_backing) {
1208 ret = -EINVAL;
1209 error_setg(errp, "Driver doesn't support backing files");
1210 QDECREF(options);
1211 goto free_exit;
1212 }
1213
Markus Armbrustere4e99862014-10-07 13:59:03 +02001214 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001215
Paolo Bonzini9156df12012-10-18 16:49:17 +02001216 if (bs->backing_format[0] != '\0') {
1217 back_drv = bdrv_find_format(bs->backing_format);
1218 }
1219
Max Reitzf67503e2014-02-18 18:33:05 +01001220 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001221 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001222 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001223 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001224 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001225 bdrv_unref(backing_hd);
1226 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001227 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001228 error_setg(errp, "Could not open backing file: %s",
1229 error_get_pretty(local_err));
1230 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001231 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001232 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001233 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001234
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001235free_exit:
1236 g_free(backing_filename);
1237 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001238}
1239
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001240/*
Max Reitzda557aa2013-12-20 19:28:11 +01001241 * Opens a disk image whose options are given as BlockdevRef in another block
1242 * device's options.
1243 *
Max Reitzda557aa2013-12-20 19:28:11 +01001244 * If allow_none is true, no image will be opened if filename is false and no
1245 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1246 *
1247 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1248 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1249 * itself, all options starting with "${bdref_key}." are considered part of the
1250 * BlockdevRef.
1251 *
1252 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001253 *
1254 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001255 */
1256int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1257 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001258 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001259{
1260 QDict *image_options;
1261 int ret;
1262 char *bdref_key_dot;
1263 const char *reference;
1264
Max Reitzf67503e2014-02-18 18:33:05 +01001265 assert(pbs);
1266 assert(*pbs == NULL);
1267
Max Reitzda557aa2013-12-20 19:28:11 +01001268 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1269 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1270 g_free(bdref_key_dot);
1271
1272 reference = qdict_get_try_str(options, bdref_key);
1273 if (!filename && !reference && !qdict_size(image_options)) {
1274 if (allow_none) {
1275 ret = 0;
1276 } else {
1277 error_setg(errp, "A block device must be specified for \"%s\"",
1278 bdref_key);
1279 ret = -EINVAL;
1280 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001281 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001282 goto done;
1283 }
1284
Max Reitzf7d9fd82014-02-18 18:33:12 +01001285 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001286
1287done:
1288 qdict_del(options, bdref_key);
1289 return ret;
1290}
1291
Chen Gang6b8aeca2014-06-23 23:28:23 +08001292int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001293{
1294 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001295 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001296 int64_t total_size;
1297 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001298 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001299 QDict *snapshot_options;
1300 BlockDriverState *bs_snapshot;
1301 Error *local_err;
1302 int ret;
1303
1304 /* if snapshot, we create a temporary backing file and open it
1305 instead of opening 'filename' directly */
1306
1307 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001308 total_size = bdrv_getlength(bs);
1309 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001310 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001311 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001312 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001313 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001314
1315 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001316 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001317 if (ret < 0) {
1318 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001319 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001320 }
1321
1322 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001323 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1324 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001325 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001326 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001327 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001328 if (ret < 0) {
1329 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1330 "'%s': %s", tmp_filename,
1331 error_get_pretty(local_err));
1332 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001333 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001334 }
1335
1336 /* Prepare a new options QDict for the temporary file */
1337 snapshot_options = qdict_new();
1338 qdict_put(snapshot_options, "file.driver",
1339 qstring_from_str("file"));
1340 qdict_put(snapshot_options, "file.filename",
1341 qstring_from_str(tmp_filename));
1342
Markus Armbrustere4e99862014-10-07 13:59:03 +02001343 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001344
1345 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001346 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001347 if (ret < 0) {
1348 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001349 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001350 }
1351
1352 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001353
1354out:
1355 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001356 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001357}
1358
Max Reitzda557aa2013-12-20 19:28:11 +01001359/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001360 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001361 *
1362 * options is a QDict of options to pass to the block drivers, or NULL for an
1363 * empty set of options. The reference to the QDict belongs to the block layer
1364 * after the call (even on failure), so if the caller intends to reuse the
1365 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001366 *
1367 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1368 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001369 *
1370 * The reference parameter may be used to specify an existing block device which
1371 * should be opened. If specified, neither options nor a filename may be given,
1372 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001373 */
Max Reitzddf56362014-02-18 18:33:06 +01001374int bdrv_open(BlockDriverState **pbs, const char *filename,
1375 const char *reference, QDict *options, int flags,
1376 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001377{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001378 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001379 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001380 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001381 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001382 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001383
Max Reitzf67503e2014-02-18 18:33:05 +01001384 assert(pbs);
1385
Max Reitzddf56362014-02-18 18:33:06 +01001386 if (reference) {
1387 bool options_non_empty = options ? qdict_size(options) : false;
1388 QDECREF(options);
1389
1390 if (*pbs) {
1391 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1392 "another block device");
1393 return -EINVAL;
1394 }
1395
1396 if (filename || options_non_empty) {
1397 error_setg(errp, "Cannot reference an existing block device with "
1398 "additional options or a new filename");
1399 return -EINVAL;
1400 }
1401
1402 bs = bdrv_lookup_bs(reference, reference, errp);
1403 if (!bs) {
1404 return -ENODEV;
1405 }
1406 bdrv_ref(bs);
1407 *pbs = bs;
1408 return 0;
1409 }
1410
Max Reitzf67503e2014-02-18 18:33:05 +01001411 if (*pbs) {
1412 bs = *pbs;
1413 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001414 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001415 }
1416
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001417 /* NULL means an empty set of options */
1418 if (options == NULL) {
1419 options = qdict_new();
1420 }
1421
Kevin Wolf17b005f2014-05-27 10:50:29 +02001422 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001423 if (local_err) {
1424 goto fail;
1425 }
1426
Kevin Wolf76c591b2014-06-04 14:19:44 +02001427 /* Find the right image format driver */
1428 drv = NULL;
1429 drvname = qdict_get_try_str(options, "driver");
1430 if (drvname) {
1431 drv = bdrv_find_format(drvname);
1432 qdict_del(options, "driver");
1433 if (!drv) {
1434 error_setg(errp, "Unknown driver: '%s'", drvname);
1435 ret = -EINVAL;
1436 goto fail;
1437 }
1438 }
1439
1440 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1441 if (drv && !drv->bdrv_file_open) {
1442 /* If the user explicitly wants a format driver here, we'll need to add
1443 * another layer for the protocol in bs->file */
1444 flags &= ~BDRV_O_PROTOCOL;
1445 }
1446
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001447 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001448 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001449
Kevin Wolff4788ad2014-06-03 16:44:19 +02001450 /* Open image file without format layer */
1451 if ((flags & BDRV_O_PROTOCOL) == 0) {
1452 if (flags & BDRV_O_RDWR) {
1453 flags |= BDRV_O_ALLOW_RDWR;
1454 }
1455 if (flags & BDRV_O_SNAPSHOT) {
1456 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1457 flags = bdrv_backing_flags(flags);
1458 }
1459
1460 assert(file == NULL);
1461 ret = bdrv_open_image(&file, filename, options, "file",
1462 bdrv_inherited_flags(flags),
1463 true, &local_err);
1464 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001465 goto fail;
1466 }
1467 }
1468
Kevin Wolf76c591b2014-06-04 14:19:44 +02001469 /* Image format probing */
1470 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001471 ret = find_image_format(file, filename, &drv, &local_err);
1472 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001473 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001474 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001475 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001476 error_setg(errp, "Must specify either driver or file");
1477 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001478 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001479 }
1480
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001481 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001482 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001483 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001484 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001485 }
1486
Max Reitz2a05cbe2013-12-20 19:28:10 +01001487 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001488 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001489 file = NULL;
1490 }
1491
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001492 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001493 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001494 QDict *backing_options;
1495
Benoît Canet5726d872013-09-25 13:30:01 +02001496 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001497 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001498 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001499 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001500 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001501 }
1502
Max Reitz91af7012014-07-18 20:24:56 +02001503 bdrv_refresh_filename(bs);
1504
Kevin Wolfb9988752014-04-03 12:09:34 +02001505 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1506 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001507 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001508 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001509 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001510 goto close_and_fail;
1511 }
1512 }
1513
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001514 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001515 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001516 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001517 if (flags & BDRV_O_PROTOCOL) {
1518 error_setg(errp, "Block protocol '%s' doesn't support the option "
1519 "'%s'", drv->format_name, entry->key);
1520 } else {
1521 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1522 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001523 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001524 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001525
1526 ret = -EINVAL;
1527 goto close_and_fail;
1528 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001529
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001530 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001531 if (bs->blk) {
1532 blk_dev_change_media_cb(bs->blk, true);
1533 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001534 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1535 && !runstate_check(RUN_STATE_INMIGRATE)
1536 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1537 error_setg(errp,
1538 "Guest must be stopped for opening of encrypted image");
1539 ret = -EBUSY;
1540 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001541 }
1542
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001543 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001544 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001545 return 0;
1546
Kevin Wolf8bfea152014-04-11 19:16:36 +02001547fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001548 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001549 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001550 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001551 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001552 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001553 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001554 if (!*pbs) {
1555 /* If *pbs is NULL, a new BDS has been created in this function and
1556 needs to be freed now. Otherwise, it does not need to be closed,
1557 since it has not really been opened yet. */
1558 bdrv_unref(bs);
1559 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001560 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001561 error_propagate(errp, local_err);
1562 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001563 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001564
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001565close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001566 /* See fail path, but now the BDS has to be always closed */
1567 if (*pbs) {
1568 bdrv_close(bs);
1569 } else {
1570 bdrv_unref(bs);
1571 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001572 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001573 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001574 error_propagate(errp, local_err);
1575 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001576 return ret;
1577}
1578
Jeff Codye971aa12012-09-20 15:13:19 -04001579typedef struct BlockReopenQueueEntry {
1580 bool prepared;
1581 BDRVReopenState state;
1582 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1583} BlockReopenQueueEntry;
1584
1585/*
1586 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1587 * reopen of multiple devices.
1588 *
1589 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1590 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1591 * be created and initialized. This newly created BlockReopenQueue should be
1592 * passed back in for subsequent calls that are intended to be of the same
1593 * atomic 'set'.
1594 *
1595 * bs is the BlockDriverState to add to the reopen queue.
1596 *
1597 * flags contains the open flags for the associated bs
1598 *
1599 * returns a pointer to bs_queue, which is either the newly allocated
1600 * bs_queue, or the existing bs_queue being used.
1601 *
1602 */
1603BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1604 BlockDriverState *bs, int flags)
1605{
1606 assert(bs != NULL);
1607
1608 BlockReopenQueueEntry *bs_entry;
1609 if (bs_queue == NULL) {
1610 bs_queue = g_new0(BlockReopenQueue, 1);
1611 QSIMPLEQ_INIT(bs_queue);
1612 }
1613
Kevin Wolff1f25a22014-04-25 19:04:55 +02001614 /* bdrv_open() masks this flag out */
1615 flags &= ~BDRV_O_PROTOCOL;
1616
Jeff Codye971aa12012-09-20 15:13:19 -04001617 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001618 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001619 }
1620
1621 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1622 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1623
1624 bs_entry->state.bs = bs;
1625 bs_entry->state.flags = flags;
1626
1627 return bs_queue;
1628}
1629
1630/*
1631 * Reopen multiple BlockDriverStates atomically & transactionally.
1632 *
1633 * The queue passed in (bs_queue) must have been built up previous
1634 * via bdrv_reopen_queue().
1635 *
1636 * Reopens all BDS specified in the queue, with the appropriate
1637 * flags. All devices are prepared for reopen, and failure of any
1638 * device will cause all device changes to be abandonded, and intermediate
1639 * data cleaned up.
1640 *
1641 * If all devices prepare successfully, then the changes are committed
1642 * to all devices.
1643 *
1644 */
1645int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1646{
1647 int ret = -1;
1648 BlockReopenQueueEntry *bs_entry, *next;
1649 Error *local_err = NULL;
1650
1651 assert(bs_queue != NULL);
1652
1653 bdrv_drain_all();
1654
1655 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1656 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1657 error_propagate(errp, local_err);
1658 goto cleanup;
1659 }
1660 bs_entry->prepared = true;
1661 }
1662
1663 /* If we reach this point, we have success and just need to apply the
1664 * changes
1665 */
1666 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1667 bdrv_reopen_commit(&bs_entry->state);
1668 }
1669
1670 ret = 0;
1671
1672cleanup:
1673 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1674 if (ret && bs_entry->prepared) {
1675 bdrv_reopen_abort(&bs_entry->state);
1676 }
1677 g_free(bs_entry);
1678 }
1679 g_free(bs_queue);
1680 return ret;
1681}
1682
1683
1684/* Reopen a single BlockDriverState with the specified flags. */
1685int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1686{
1687 int ret = -1;
1688 Error *local_err = NULL;
1689 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1690
1691 ret = bdrv_reopen_multiple(queue, &local_err);
1692 if (local_err != NULL) {
1693 error_propagate(errp, local_err);
1694 }
1695 return ret;
1696}
1697
1698
1699/*
1700 * Prepares a BlockDriverState for reopen. All changes are staged in the
1701 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1702 * the block driver layer .bdrv_reopen_prepare()
1703 *
1704 * bs is the BlockDriverState to reopen
1705 * flags are the new open flags
1706 * queue is the reopen queue
1707 *
1708 * Returns 0 on success, non-zero on error. On error errp will be set
1709 * as well.
1710 *
1711 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1712 * It is the responsibility of the caller to then call the abort() or
1713 * commit() for any other BDS that have been left in a prepare() state
1714 *
1715 */
1716int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1717 Error **errp)
1718{
1719 int ret = -1;
1720 Error *local_err = NULL;
1721 BlockDriver *drv;
1722
1723 assert(reopen_state != NULL);
1724 assert(reopen_state->bs->drv != NULL);
1725 drv = reopen_state->bs->drv;
1726
1727 /* if we are to stay read-only, do not allow permission change
1728 * to r/w */
1729 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1730 reopen_state->flags & BDRV_O_RDWR) {
1731 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001732 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001733 goto error;
1734 }
1735
1736
1737 ret = bdrv_flush(reopen_state->bs);
1738 if (ret) {
1739 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1740 strerror(-ret));
1741 goto error;
1742 }
1743
1744 if (drv->bdrv_reopen_prepare) {
1745 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1746 if (ret) {
1747 if (local_err != NULL) {
1748 error_propagate(errp, local_err);
1749 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001750 error_setg(errp, "failed while preparing to reopen image '%s'",
1751 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001752 }
1753 goto error;
1754 }
1755 } else {
1756 /* It is currently mandatory to have a bdrv_reopen_prepare()
1757 * handler for each supported drv. */
1758 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001759 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001760 "reopening of file");
1761 ret = -1;
1762 goto error;
1763 }
1764
1765 ret = 0;
1766
1767error:
1768 return ret;
1769}
1770
1771/*
1772 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1773 * makes them final by swapping the staging BlockDriverState contents into
1774 * the active BlockDriverState contents.
1775 */
1776void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1777{
1778 BlockDriver *drv;
1779
1780 assert(reopen_state != NULL);
1781 drv = reopen_state->bs->drv;
1782 assert(drv != NULL);
1783
1784 /* If there are any driver level actions to take */
1785 if (drv->bdrv_reopen_commit) {
1786 drv->bdrv_reopen_commit(reopen_state);
1787 }
1788
1789 /* set BDS specific flags now */
1790 reopen_state->bs->open_flags = reopen_state->flags;
1791 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1792 BDRV_O_CACHE_WB);
1793 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001794
Kevin Wolf3baca892014-07-16 17:48:16 +02001795 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001796}
1797
1798/*
1799 * Abort the reopen, and delete and free the staged changes in
1800 * reopen_state
1801 */
1802void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1803{
1804 BlockDriver *drv;
1805
1806 assert(reopen_state != NULL);
1807 drv = reopen_state->bs->drv;
1808 assert(drv != NULL);
1809
1810 if (drv->bdrv_reopen_abort) {
1811 drv->bdrv_reopen_abort(reopen_state);
1812 }
1813}
1814
1815
bellardfc01f7e2003-06-30 10:03:06 +00001816void bdrv_close(BlockDriverState *bs)
1817{
Max Reitz33384422014-06-20 21:57:33 +02001818 BdrvAioNotifier *ban, *ban_next;
1819
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001820 if (bs->job) {
1821 block_job_cancel_sync(bs->job);
1822 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001823 bdrv_drain_all(); /* complete I/O */
1824 bdrv_flush(bs);
1825 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001826 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001827
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001828 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001829 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001830 BlockDriverState *backing_hd = bs->backing_hd;
1831 bdrv_set_backing_hd(bs, NULL);
1832 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001833 }
bellardea2384d2004-08-01 21:59:26 +00001834 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001835 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001836 bs->opaque = NULL;
1837 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001838 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001839 bs->backing_file[0] = '\0';
1840 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001841 bs->total_sectors = 0;
1842 bs->encrypted = 0;
1843 bs->valid_key = 0;
1844 bs->sg = 0;
1845 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001846 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001847 QDECREF(bs->options);
1848 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001849 QDECREF(bs->full_open_options);
1850 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001851
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001852 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001853 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001854 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001855 }
bellardb3380822004-03-14 21:38:54 +00001856 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001857
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001858 if (bs->blk) {
1859 blk_dev_change_media_cb(bs->blk, false);
1860 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001861
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001862 /*throttling disk I/O limits*/
1863 if (bs->io_limits_enabled) {
1864 bdrv_io_limits_disable(bs);
1865 }
Max Reitz33384422014-06-20 21:57:33 +02001866
1867 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1868 g_free(ban);
1869 }
1870 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001871}
1872
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001873void bdrv_close_all(void)
1874{
1875 BlockDriverState *bs;
1876
Benoît Canetdc364f42014-01-23 21:31:32 +01001877 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001878 AioContext *aio_context = bdrv_get_aio_context(bs);
1879
1880 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001881 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001882 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001883 }
1884}
1885
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001886/* Check if any requests are in-flight (including throttled requests) */
1887static bool bdrv_requests_pending(BlockDriverState *bs)
1888{
1889 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1890 return true;
1891 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001892 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1893 return true;
1894 }
1895 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001896 return true;
1897 }
1898 if (bs->file && bdrv_requests_pending(bs->file)) {
1899 return true;
1900 }
1901 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1902 return true;
1903 }
1904 return false;
1905}
1906
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001907/*
1908 * Wait for pending requests to complete across all BlockDriverStates
1909 *
1910 * This function does not flush data to disk, use bdrv_flush_all() for that
1911 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001912 *
1913 * Note that completion of an asynchronous I/O operation can trigger any
1914 * number of other I/O operations on other devices---for example a coroutine
1915 * can be arbitrarily complex and a constant flow of I/O can come until the
1916 * coroutine is complete. Because of this, it is not possible to have a
1917 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001918 */
1919void bdrv_drain_all(void)
1920{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001921 /* Always run first iteration so any pending completion BHs run */
1922 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001923 BlockDriverState *bs;
1924
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001925 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001926 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001927
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001928 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1929 AioContext *aio_context = bdrv_get_aio_context(bs);
1930 bool bs_busy;
1931
1932 aio_context_acquire(aio_context);
Ming Lei448ad912014-07-04 18:04:33 +08001933 bdrv_flush_io_queue(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001934 bdrv_start_throttled_reqs(bs);
1935 bs_busy = bdrv_requests_pending(bs);
1936 bs_busy |= aio_poll(aio_context, bs_busy);
1937 aio_context_release(aio_context);
1938
1939 busy |= bs_busy;
1940 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001941 }
1942}
1943
Benoît Canetdc364f42014-01-23 21:31:32 +01001944/* make a BlockDriverState anonymous by removing from bdrv_state and
1945 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001946 Also, NULL terminate the device_name to prevent double remove */
1947void bdrv_make_anon(BlockDriverState *bs)
1948{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001949 /*
1950 * Take care to remove bs from bdrv_states only when it's actually
1951 * in it. Note that bs->device_list.tqe_prev is initially null,
1952 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1953 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1954 * resetting it to null on remove.
1955 */
1956 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001957 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001958 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05001959 }
Benoît Canetdc364f42014-01-23 21:31:32 +01001960 if (bs->node_name[0] != '\0') {
1961 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1962 }
1963 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001964}
1965
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001966static void bdrv_rebind(BlockDriverState *bs)
1967{
1968 if (bs->drv && bs->drv->bdrv_rebind) {
1969 bs->drv->bdrv_rebind(bs);
1970 }
1971}
1972
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001973static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1974 BlockDriverState *bs_src)
1975{
1976 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001977
1978 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001979 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001980 bs_dest->copy_on_read = bs_src->copy_on_read;
1981
1982 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1983
Benoît Canetcc0681c2013-09-02 14:14:39 +02001984 /* i/o throttled req */
1985 memcpy(&bs_dest->throttle_state,
1986 &bs_src->throttle_state,
1987 sizeof(ThrottleState));
1988 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1989 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001990 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1991
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001992 /* r/w error */
1993 bs_dest->on_read_error = bs_src->on_read_error;
1994 bs_dest->on_write_error = bs_src->on_write_error;
1995
1996 /* i/o status */
1997 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1998 bs_dest->iostatus = bs_src->iostatus;
1999
2000 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002001 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002002
Fam Zheng9fcb0252013-08-23 09:14:46 +08002003 /* reference count */
2004 bs_dest->refcnt = bs_src->refcnt;
2005
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002006 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002007 bs_dest->job = bs_src->job;
2008
2009 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002010 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002011 bs_dest->blk = bs_src->blk;
2012
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002013 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2014 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002015}
2016
2017/*
2018 * Swap bs contents for two image chains while they are live,
2019 * while keeping required fields on the BlockDriverState that is
2020 * actually attached to a device.
2021 *
2022 * This will modify the BlockDriverState fields, and swap contents
2023 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2024 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002025 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002026 *
2027 * This function does not create any image files.
2028 */
2029void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2030{
2031 BlockDriverState tmp;
2032
Benoît Canet90ce8a02014-03-05 23:48:29 +01002033 /* The code needs to swap the node_name but simply swapping node_list won't
2034 * work so first remove the nodes from the graph list, do the swap then
2035 * insert them back if needed.
2036 */
2037 if (bs_new->node_name[0] != '\0') {
2038 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2039 }
2040 if (bs_old->node_name[0] != '\0') {
2041 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2042 }
2043
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002044 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002045 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002046 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002047 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002048 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002049 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002050
2051 tmp = *bs_new;
2052 *bs_new = *bs_old;
2053 *bs_old = tmp;
2054
2055 /* there are some fields that should not be swapped, move them back */
2056 bdrv_move_feature_fields(&tmp, bs_old);
2057 bdrv_move_feature_fields(bs_old, bs_new);
2058 bdrv_move_feature_fields(bs_new, &tmp);
2059
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002060 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002061 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002062
2063 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002064 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002065 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002066 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002067
Benoît Canet90ce8a02014-03-05 23:48:29 +01002068 /* insert the nodes back into the graph node list if needed */
2069 if (bs_new->node_name[0] != '\0') {
2070 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2071 }
2072 if (bs_old->node_name[0] != '\0') {
2073 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2074 }
2075
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002076 bdrv_rebind(bs_new);
2077 bdrv_rebind(bs_old);
2078}
2079
Jeff Cody8802d1f2012-02-28 15:54:06 -05002080/*
2081 * Add new bs contents at the top of an image chain while the chain is
2082 * live, while keeping required fields on the top layer.
2083 *
2084 * This will modify the BlockDriverState fields, and swap contents
2085 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2086 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002087 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002088 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002089 * This function does not create any image files.
2090 */
2091void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2092{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002093 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002094
2095 /* The contents of 'tmp' will become bs_top, as we are
2096 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002097 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002098}
2099
Fam Zheng4f6fd342013-08-23 09:14:47 +08002100static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002101{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002102 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002103 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002104 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002105 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002106
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002107 bdrv_close(bs);
2108
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002109 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002110 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002111
Anthony Liguori7267c092011-08-20 22:09:37 -05002112 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002113}
2114
aliguorie97fc192009-04-21 23:11:50 +00002115/*
2116 * Run consistency checks on an image
2117 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002118 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002119 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002120 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002121 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002122int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002123{
Max Reitz908bcd52014-08-07 22:47:55 +02002124 if (bs->drv == NULL) {
2125 return -ENOMEDIUM;
2126 }
aliguorie97fc192009-04-21 23:11:50 +00002127 if (bs->drv->bdrv_check == NULL) {
2128 return -ENOTSUP;
2129 }
2130
Kevin Wolfe076f332010-06-29 11:43:13 +02002131 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002132 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002133}
2134
Kevin Wolf8a426612010-07-16 17:17:01 +02002135#define COMMIT_BUF_SECTORS 2048
2136
bellard33e39632003-07-06 17:15:21 +00002137/* commit COW file into the raw image */
2138int bdrv_commit(BlockDriverState *bs)
2139{
bellard19cb3732006-08-19 11:45:59 +00002140 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002141 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002142 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002143 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002144 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002145 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002146
bellard19cb3732006-08-19 11:45:59 +00002147 if (!drv)
2148 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002149
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002150 if (!bs->backing_hd) {
2151 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002152 }
2153
Fam Zheng3718d8a2014-05-23 21:29:43 +08002154 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2155 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002156 return -EBUSY;
2157 }
2158
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002159 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002160 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2161 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002162 open_flags = bs->backing_hd->open_flags;
2163
2164 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002165 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2166 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002167 }
bellard33e39632003-07-06 17:15:21 +00002168 }
bellardea2384d2004-08-01 21:59:26 +00002169
Jeff Cody72706ea2014-01-24 09:02:35 -05002170 length = bdrv_getlength(bs);
2171 if (length < 0) {
2172 ret = length;
2173 goto ro_cleanup;
2174 }
2175
2176 backing_length = bdrv_getlength(bs->backing_hd);
2177 if (backing_length < 0) {
2178 ret = backing_length;
2179 goto ro_cleanup;
2180 }
2181
2182 /* If our top snapshot is larger than the backing file image,
2183 * grow the backing file image if possible. If not possible,
2184 * we must return an error */
2185 if (length > backing_length) {
2186 ret = bdrv_truncate(bs->backing_hd, length);
2187 if (ret < 0) {
2188 goto ro_cleanup;
2189 }
2190 }
2191
2192 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002193
2194 /* qemu_try_blockalign() for bs will choose an alignment that works for
2195 * bs->backing_hd as well, so no need to compare the alignment manually. */
2196 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2197 if (buf == NULL) {
2198 ret = -ENOMEM;
2199 goto ro_cleanup;
2200 }
bellardea2384d2004-08-01 21:59:26 +00002201
Kevin Wolf8a426612010-07-16 17:17:01 +02002202 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002203 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2204 if (ret < 0) {
2205 goto ro_cleanup;
2206 }
2207 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002208 ret = bdrv_read(bs, sector, buf, n);
2209 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002210 goto ro_cleanup;
2211 }
2212
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002213 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2214 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002215 goto ro_cleanup;
2216 }
bellardea2384d2004-08-01 21:59:26 +00002217 }
2218 }
bellard95389c82005-12-18 18:28:15 +00002219
Christoph Hellwig1d449522010-01-17 12:32:30 +01002220 if (drv->bdrv_make_empty) {
2221 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002222 if (ret < 0) {
2223 goto ro_cleanup;
2224 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002225 bdrv_flush(bs);
2226 }
bellard95389c82005-12-18 18:28:15 +00002227
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002228 /*
2229 * Make sure all data we wrote to the backing device is actually
2230 * stable on disk.
2231 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002232 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002233 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002234 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002235
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002236 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002237ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002238 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002239
2240 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002241 /* ignoring error return here */
2242 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002243 }
2244
Christoph Hellwig1d449522010-01-17 12:32:30 +01002245 return ret;
bellard33e39632003-07-06 17:15:21 +00002246}
2247
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002248int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002249{
2250 BlockDriverState *bs;
2251
Benoît Canetdc364f42014-01-23 21:31:32 +01002252 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002253 AioContext *aio_context = bdrv_get_aio_context(bs);
2254
2255 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002256 if (bs->drv && bs->backing_hd) {
2257 int ret = bdrv_commit(bs);
2258 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002259 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002260 return ret;
2261 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002262 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002263 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002264 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002265 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002266}
2267
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002268/**
2269 * Remove an active request from the tracked requests list
2270 *
2271 * This function should be called when a tracked request is completing.
2272 */
2273static void tracked_request_end(BdrvTrackedRequest *req)
2274{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002275 if (req->serialising) {
2276 req->bs->serialising_in_flight--;
2277 }
2278
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002279 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002280 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002281}
2282
2283/**
2284 * Add an active request to the tracked requests list
2285 */
2286static void tracked_request_begin(BdrvTrackedRequest *req,
2287 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002288 int64_t offset,
2289 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002290{
2291 *req = (BdrvTrackedRequest){
2292 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002293 .offset = offset,
2294 .bytes = bytes,
2295 .is_write = is_write,
2296 .co = qemu_coroutine_self(),
2297 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002298 .overlap_offset = offset,
2299 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002300 };
2301
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002302 qemu_co_queue_init(&req->wait_queue);
2303
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002304 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2305}
2306
Kevin Wolfe96126f2014-02-08 10:42:18 +01002307static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002308{
Kevin Wolf73271452013-12-04 17:08:50 +01002309 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002310 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2311 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002312
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002313 if (!req->serialising) {
2314 req->bs->serialising_in_flight++;
2315 req->serialising = true;
2316 }
Kevin Wolf73271452013-12-04 17:08:50 +01002317
2318 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2319 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002320}
2321
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002322/**
2323 * Round a region to cluster boundaries
2324 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002325void bdrv_round_to_clusters(BlockDriverState *bs,
2326 int64_t sector_num, int nb_sectors,
2327 int64_t *cluster_sector_num,
2328 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002329{
2330 BlockDriverInfo bdi;
2331
2332 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2333 *cluster_sector_num = sector_num;
2334 *cluster_nb_sectors = nb_sectors;
2335 } else {
2336 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2337 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2338 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2339 nb_sectors, c);
2340 }
2341}
2342
Kevin Wolf73271452013-12-04 17:08:50 +01002343static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002344{
2345 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002346 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002347
Kevin Wolf73271452013-12-04 17:08:50 +01002348 ret = bdrv_get_info(bs, &bdi);
2349 if (ret < 0 || bdi.cluster_size == 0) {
2350 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002351 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002352 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002353 }
2354}
2355
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002356static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002357 int64_t offset, unsigned int bytes)
2358{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002359 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002360 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002361 return false;
2362 }
2363 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002364 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002365 return false;
2366 }
2367 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002368}
2369
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002370static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002371{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002372 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002373 BdrvTrackedRequest *req;
2374 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002375 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002376
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002377 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002378 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002379 }
2380
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002381 do {
2382 retry = false;
2383 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002384 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002385 continue;
2386 }
Kevin Wolf73271452013-12-04 17:08:50 +01002387 if (tracked_request_overlaps(req, self->overlap_offset,
2388 self->overlap_bytes))
2389 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002390 /* Hitting this means there was a reentrant request, for
2391 * example, a block driver issuing nested requests. This must
2392 * never happen since it means deadlock.
2393 */
2394 assert(qemu_coroutine_self() != req->co);
2395
Kevin Wolf64604402013-12-13 13:04:35 +01002396 /* If the request is already (indirectly) waiting for us, or
2397 * will wait for us as soon as it wakes up, then just go on
2398 * (instead of producing a deadlock in the former case). */
2399 if (!req->waiting_for) {
2400 self->waiting_for = req;
2401 qemu_co_queue_wait(&req->wait_queue);
2402 self->waiting_for = NULL;
2403 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002404 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002405 break;
2406 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002407 }
2408 }
2409 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002410
2411 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002412}
2413
Kevin Wolf756e6732010-01-12 12:55:17 +01002414/*
2415 * Return values:
2416 * 0 - success
2417 * -EINVAL - backing format specified, but no file
2418 * -ENOSPC - can't update the backing file because no space is left in the
2419 * image file header
2420 * -ENOTSUP - format driver doesn't support changing the backing file
2421 */
2422int bdrv_change_backing_file(BlockDriverState *bs,
2423 const char *backing_file, const char *backing_fmt)
2424{
2425 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002426 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002427
Paolo Bonzini5f377792012-04-12 14:01:01 +02002428 /* Backing file format doesn't make sense without a backing file */
2429 if (backing_fmt && !backing_file) {
2430 return -EINVAL;
2431 }
2432
Kevin Wolf756e6732010-01-12 12:55:17 +01002433 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002434 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002435 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002436 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002437 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002438
2439 if (ret == 0) {
2440 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2441 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2442 }
2443 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002444}
2445
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002446/*
2447 * Finds the image layer in the chain that has 'bs' as its backing file.
2448 *
2449 * active is the current topmost image.
2450 *
2451 * Returns NULL if bs is not found in active's image chain,
2452 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002453 *
2454 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002455 */
2456BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2457 BlockDriverState *bs)
2458{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002459 while (active && bs != active->backing_hd) {
2460 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002461 }
2462
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002463 return active;
2464}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002465
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002466/* Given a BDS, searches for the base layer. */
2467BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2468{
2469 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002470}
2471
2472typedef struct BlkIntermediateStates {
2473 BlockDriverState *bs;
2474 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2475} BlkIntermediateStates;
2476
2477
2478/*
2479 * Drops images above 'base' up to and including 'top', and sets the image
2480 * above 'top' to have base as its backing file.
2481 *
2482 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2483 * information in 'bs' can be properly updated.
2484 *
2485 * E.g., this will convert the following chain:
2486 * bottom <- base <- intermediate <- top <- active
2487 *
2488 * to
2489 *
2490 * bottom <- base <- active
2491 *
2492 * It is allowed for bottom==base, in which case it converts:
2493 *
2494 * base <- intermediate <- top <- active
2495 *
2496 * to
2497 *
2498 * base <- active
2499 *
Jeff Cody54e26902014-06-25 15:40:10 -04002500 * If backing_file_str is non-NULL, it will be used when modifying top's
2501 * overlay image metadata.
2502 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002503 * Error conditions:
2504 * if active == top, that is considered an error
2505 *
2506 */
2507int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002508 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002509{
2510 BlockDriverState *intermediate;
2511 BlockDriverState *base_bs = NULL;
2512 BlockDriverState *new_top_bs = NULL;
2513 BlkIntermediateStates *intermediate_state, *next;
2514 int ret = -EIO;
2515
2516 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2517 QSIMPLEQ_INIT(&states_to_delete);
2518
2519 if (!top->drv || !base->drv) {
2520 goto exit;
2521 }
2522
2523 new_top_bs = bdrv_find_overlay(active, top);
2524
2525 if (new_top_bs == NULL) {
2526 /* we could not find the image above 'top', this is an error */
2527 goto exit;
2528 }
2529
2530 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2531 * to do, no intermediate images */
2532 if (new_top_bs->backing_hd == base) {
2533 ret = 0;
2534 goto exit;
2535 }
2536
2537 intermediate = top;
2538
2539 /* now we will go down through the list, and add each BDS we find
2540 * into our deletion queue, until we hit the 'base'
2541 */
2542 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002543 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002544 intermediate_state->bs = intermediate;
2545 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2546
2547 if (intermediate->backing_hd == base) {
2548 base_bs = intermediate->backing_hd;
2549 break;
2550 }
2551 intermediate = intermediate->backing_hd;
2552 }
2553 if (base_bs == NULL) {
2554 /* something went wrong, we did not end at the base. safely
2555 * unravel everything, and exit with error */
2556 goto exit;
2557 }
2558
2559 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002560 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2561 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002562 base_bs->drv ? base_bs->drv->format_name : "");
2563 if (ret) {
2564 goto exit;
2565 }
Fam Zheng920beae2014-05-23 21:29:46 +08002566 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002567
2568 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2569 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002570 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002571 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002572 }
2573 ret = 0;
2574
2575exit:
2576 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2577 g_free(intermediate_state);
2578 }
2579 return ret;
2580}
2581
2582
aliguori71d07702009-03-03 17:37:16 +00002583static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2584 size_t size)
2585{
2586 int64_t len;
2587
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002588 if (size > INT_MAX) {
2589 return -EIO;
2590 }
2591
aliguori71d07702009-03-03 17:37:16 +00002592 if (!bdrv_is_inserted(bs))
2593 return -ENOMEDIUM;
2594
2595 if (bs->growable)
2596 return 0;
2597
2598 len = bdrv_getlength(bs);
2599
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002600 if (offset < 0)
2601 return -EIO;
2602
2603 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002604 return -EIO;
2605
2606 return 0;
2607}
2608
2609static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2610 int nb_sectors)
2611{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002612 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002613 return -EIO;
2614 }
2615
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002616 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2617 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002618}
2619
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002620typedef struct RwCo {
2621 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002622 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002623 QEMUIOVector *qiov;
2624 bool is_write;
2625 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002626 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002627} RwCo;
2628
2629static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2630{
2631 RwCo *rwco = opaque;
2632
2633 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002634 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2635 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002636 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002637 } else {
2638 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2639 rwco->qiov->size, rwco->qiov,
2640 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002641 }
2642}
2643
2644/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002645 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002646 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002647static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2648 QEMUIOVector *qiov, bool is_write,
2649 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002650{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002651 Coroutine *co;
2652 RwCo rwco = {
2653 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002654 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002655 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002656 .is_write = is_write,
2657 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002658 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002659 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002660
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002661 /**
2662 * In sync call context, when the vcpu is blocked, this throttling timer
2663 * will not fire; so the I/O throttling function has to be disabled here
2664 * if it has been enabled.
2665 */
2666 if (bs->io_limits_enabled) {
2667 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2668 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2669 bdrv_io_limits_disable(bs);
2670 }
2671
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002672 if (qemu_in_coroutine()) {
2673 /* Fast-path if already in coroutine context */
2674 bdrv_rw_co_entry(&rwco);
2675 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002676 AioContext *aio_context = bdrv_get_aio_context(bs);
2677
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002678 co = qemu_coroutine_create(bdrv_rw_co_entry);
2679 qemu_coroutine_enter(co, &rwco);
2680 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002681 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002682 }
2683 }
2684 return rwco.ret;
2685}
2686
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002687/*
2688 * Process a synchronous request using coroutines
2689 */
2690static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002691 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002692{
2693 QEMUIOVector qiov;
2694 struct iovec iov = {
2695 .iov_base = (void *)buf,
2696 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2697 };
2698
Kevin Wolfda15ee52014-04-14 15:39:36 +02002699 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2700 return -EINVAL;
2701 }
2702
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002703 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002704 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2705 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002706}
2707
bellard19cb3732006-08-19 11:45:59 +00002708/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002709int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002710 uint8_t *buf, int nb_sectors)
2711{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002712 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002713}
2714
Markus Armbruster07d27a42012-06-29 17:34:29 +02002715/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2716int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2717 uint8_t *buf, int nb_sectors)
2718{
2719 bool enabled;
2720 int ret;
2721
2722 enabled = bs->io_limits_enabled;
2723 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002724 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002725 bs->io_limits_enabled = enabled;
2726 return ret;
2727}
2728
ths5fafdf22007-09-16 21:08:06 +00002729/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002730 -EIO generic I/O error (may happen for all errors)
2731 -ENOMEDIUM No media inserted.
2732 -EINVAL Invalid sector number or nb_sectors
2733 -EACCES Trying to write a read-only device
2734*/
ths5fafdf22007-09-16 21:08:06 +00002735int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002736 const uint8_t *buf, int nb_sectors)
2737{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002738 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002739}
2740
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002741int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2742 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002743{
2744 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002745 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002746}
2747
Peter Lievend75cbb52013-10-24 12:07:03 +02002748/*
2749 * Completely zero out a block device with the help of bdrv_write_zeroes.
2750 * The operation is sped up by checking the block status and only writing
2751 * zeroes to the device if they currently do not return zeroes. Optional
2752 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2753 *
2754 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2755 */
2756int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2757{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002758 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002759 int n;
2760
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002761 target_sectors = bdrv_nb_sectors(bs);
2762 if (target_sectors < 0) {
2763 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002764 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002765
Peter Lievend75cbb52013-10-24 12:07:03 +02002766 for (;;) {
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002767 nb_sectors = target_sectors - sector_num;
Peter Lievend75cbb52013-10-24 12:07:03 +02002768 if (nb_sectors <= 0) {
2769 return 0;
2770 }
2771 if (nb_sectors > INT_MAX) {
2772 nb_sectors = INT_MAX;
2773 }
2774 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002775 if (ret < 0) {
2776 error_report("error getting block status at sector %" PRId64 ": %s",
2777 sector_num, strerror(-ret));
2778 return ret;
2779 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002780 if (ret & BDRV_BLOCK_ZERO) {
2781 sector_num += n;
2782 continue;
2783 }
2784 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2785 if (ret < 0) {
2786 error_report("error writing zeroes at sector %" PRId64 ": %s",
2787 sector_num, strerror(-ret));
2788 return ret;
2789 }
2790 sector_num += n;
2791 }
2792}
2793
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002794int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002795{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002796 QEMUIOVector qiov;
2797 struct iovec iov = {
2798 .iov_base = (void *)buf,
2799 .iov_len = bytes,
2800 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002801 int ret;
bellard83f64092006-08-01 16:21:11 +00002802
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002803 if (bytes < 0) {
2804 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002805 }
2806
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002807 qemu_iovec_init_external(&qiov, &iov, 1);
2808 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2809 if (ret < 0) {
2810 return ret;
bellard83f64092006-08-01 16:21:11 +00002811 }
2812
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002813 return bytes;
bellard83f64092006-08-01 16:21:11 +00002814}
2815
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002816int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002817{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002818 int ret;
bellard83f64092006-08-01 16:21:11 +00002819
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002820 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2821 if (ret < 0) {
2822 return ret;
bellard83f64092006-08-01 16:21:11 +00002823 }
2824
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002825 return qiov->size;
2826}
2827
2828int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002829 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002830{
2831 QEMUIOVector qiov;
2832 struct iovec iov = {
2833 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002834 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002835 };
2836
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002837 if (bytes < 0) {
2838 return -EINVAL;
2839 }
2840
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002841 qemu_iovec_init_external(&qiov, &iov, 1);
2842 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002843}
bellard83f64092006-08-01 16:21:11 +00002844
Kevin Wolff08145f2010-06-16 16:38:15 +02002845/*
2846 * Writes to the file and ensures that no writes are reordered across this
2847 * request (acts as a barrier)
2848 *
2849 * Returns 0 on success, -errno in error cases.
2850 */
2851int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2852 const void *buf, int count)
2853{
2854 int ret;
2855
2856 ret = bdrv_pwrite(bs, offset, buf, count);
2857 if (ret < 0) {
2858 return ret;
2859 }
2860
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002861 /* No flush needed for cache modes that already do it */
2862 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002863 bdrv_flush(bs);
2864 }
2865
2866 return 0;
2867}
2868
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002869static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002870 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2871{
2872 /* Perform I/O through a temporary buffer so that users who scribble over
2873 * their read buffer while the operation is in progress do not end up
2874 * modifying the image file. This is critical for zero-copy guest I/O
2875 * where anything might happen inside guest memory.
2876 */
2877 void *bounce_buffer;
2878
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002879 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002880 struct iovec iov;
2881 QEMUIOVector bounce_qiov;
2882 int64_t cluster_sector_num;
2883 int cluster_nb_sectors;
2884 size_t skip_bytes;
2885 int ret;
2886
2887 /* Cover entire cluster so no additional backing file I/O is required when
2888 * allocating cluster in the image file.
2889 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002890 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2891 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002892
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002893 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2894 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002895
2896 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002897 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
2898 if (bounce_buffer == NULL) {
2899 ret = -ENOMEM;
2900 goto err;
2901 }
2902
Stefan Hajnocziab185922011-11-17 13:40:31 +00002903 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2904
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002905 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2906 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002907 if (ret < 0) {
2908 goto err;
2909 }
2910
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002911 if (drv->bdrv_co_write_zeroes &&
2912 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002913 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002914 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002915 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002916 /* This does not change the data on the disk, it is not necessary
2917 * to flush even in cache=writethrough mode.
2918 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002919 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002920 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002921 }
2922
Stefan Hajnocziab185922011-11-17 13:40:31 +00002923 if (ret < 0) {
2924 /* It might be okay to ignore write errors for guest requests. If this
2925 * is a deliberate copy-on-read then we don't want to ignore the error.
2926 * Simply report it in all cases.
2927 */
2928 goto err;
2929 }
2930
2931 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002932 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2933 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002934
2935err:
2936 qemu_vfree(bounce_buffer);
2937 return ret;
2938}
2939
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002940/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002941 * Forwards an already correctly aligned request to the BlockDriver. This
2942 * handles copy on read and zeroing after EOF; any other features must be
2943 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002944 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002945static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002946 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002947 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002948{
2949 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002950 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002951
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002952 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2953 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002954
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002955 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2956 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02002957 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002958
2959 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002960 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002961 /* If we touch the same cluster it counts as an overlap. This
2962 * guarantees that allocating writes will be serialized and not race
2963 * with each other for the same cluster. For example, in copy-on-read
2964 * it ensures that the CoR read and write operations are atomic and
2965 * guest writes cannot interleave between them. */
2966 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002967 }
2968
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002969 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002970
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002971 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002972 int pnum;
2973
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002974 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002975 if (ret < 0) {
2976 goto out;
2977 }
2978
2979 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002980 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002981 goto out;
2982 }
2983 }
2984
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002985 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002986 if (!(bs->zero_beyond_eof && bs->growable)) {
2987 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2988 } else {
2989 /* Read zeros after EOF of growable BDSes */
Markus Armbruster40490822014-06-26 13:23:19 +02002990 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002991
Markus Armbruster40490822014-06-26 13:23:19 +02002992 total_sectors = bdrv_nb_sectors(bs);
2993 if (total_sectors < 0) {
2994 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002995 goto out;
2996 }
2997
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002998 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2999 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003000 if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003001 QEMUIOVector local_qiov;
3002 size_t local_sectors;
3003
3004 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3005 local_sectors = MIN(max_nb_sectors, nb_sectors);
3006
3007 qemu_iovec_init(&local_qiov, qiov->niov);
3008 qemu_iovec_concat(&local_qiov, qiov, 0,
3009 local_sectors * BDRV_SECTOR_SIZE);
3010
3011 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3012 &local_qiov);
3013
3014 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003015 } else {
3016 ret = 0;
3017 }
3018
3019 /* Reading beyond end of file is supposed to produce zeroes */
3020 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3021 uint64_t offset = MAX(0, total_sectors - sector_num);
3022 uint64_t bytes = (sector_num + nb_sectors - offset) *
3023 BDRV_SECTOR_SIZE;
3024 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3025 }
3026 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003027
3028out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003029 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003030}
3031
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003032/*
3033 * Handle a read request in coroutine context
3034 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003035static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3036 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003037 BdrvRequestFlags flags)
3038{
3039 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003040 BdrvTrackedRequest req;
3041
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003042 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3043 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3044 uint8_t *head_buf = NULL;
3045 uint8_t *tail_buf = NULL;
3046 QEMUIOVector local_qiov;
3047 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003048 int ret;
3049
3050 if (!drv) {
3051 return -ENOMEDIUM;
3052 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003053 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003054 return -EIO;
3055 }
3056
3057 if (bs->copy_on_read) {
3058 flags |= BDRV_REQ_COPY_ON_READ;
3059 }
3060
3061 /* throttling disk I/O */
3062 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003063 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003064 }
3065
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003066 /* Align read if necessary by padding qiov */
3067 if (offset & (align - 1)) {
3068 head_buf = qemu_blockalign(bs, align);
3069 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3070 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3071 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3072 use_local_qiov = true;
3073
3074 bytes += offset & (align - 1);
3075 offset = offset & ~(align - 1);
3076 }
3077
3078 if ((offset + bytes) & (align - 1)) {
3079 if (!use_local_qiov) {
3080 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3081 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3082 use_local_qiov = true;
3083 }
3084 tail_buf = qemu_blockalign(bs, align);
3085 qemu_iovec_add(&local_qiov, tail_buf,
3086 align - ((offset + bytes) & (align - 1)));
3087
3088 bytes = ROUND_UP(bytes, align);
3089 }
3090
Kevin Wolf65afd212013-12-03 14:55:55 +01003091 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003092 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003093 use_local_qiov ? &local_qiov : qiov,
3094 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003095 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003096
3097 if (use_local_qiov) {
3098 qemu_iovec_destroy(&local_qiov);
3099 qemu_vfree(head_buf);
3100 qemu_vfree(tail_buf);
3101 }
3102
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003103 return ret;
3104}
3105
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003106static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3107 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3108 BdrvRequestFlags flags)
3109{
3110 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3111 return -EINVAL;
3112 }
3113
3114 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3115 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3116}
3117
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003118int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003119 int nb_sectors, QEMUIOVector *qiov)
3120{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003121 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003122
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003123 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3124}
3125
3126int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3127 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3128{
3129 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3130
3131 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3132 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003133}
3134
Peter Lievenc31cb702013-10-24 12:06:58 +02003135/* if no limit is specified in the BlockLimits use a default
3136 * of 32768 512-byte sectors (16 MiB) per request.
3137 */
3138#define MAX_WRITE_ZEROES_DEFAULT 32768
3139
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003140static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003141 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003142{
3143 BlockDriver *drv = bs->drv;
3144 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003145 struct iovec iov = {0};
3146 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003147
Peter Lievenc31cb702013-10-24 12:06:58 +02003148 int max_write_zeroes = bs->bl.max_write_zeroes ?
3149 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003150
Peter Lievenc31cb702013-10-24 12:06:58 +02003151 while (nb_sectors > 0 && !ret) {
3152 int num = nb_sectors;
3153
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003154 /* Align request. Block drivers can expect the "bulk" of the request
3155 * to be aligned.
3156 */
3157 if (bs->bl.write_zeroes_alignment
3158 && num > bs->bl.write_zeroes_alignment) {
3159 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3160 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003161 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003162 num -= sector_num % bs->bl.write_zeroes_alignment;
3163 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3164 /* Shorten the request to the last aligned sector. num cannot
3165 * underflow because num > bs->bl.write_zeroes_alignment.
3166 */
3167 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003168 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003169 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003170
3171 /* limit request size */
3172 if (num > max_write_zeroes) {
3173 num = max_write_zeroes;
3174 }
3175
3176 ret = -ENOTSUP;
3177 /* First try the efficient write zeroes operation */
3178 if (drv->bdrv_co_write_zeroes) {
3179 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3180 }
3181
3182 if (ret == -ENOTSUP) {
3183 /* Fall back to bounce buffer if write zeroes is unsupported */
3184 iov.iov_len = num * BDRV_SECTOR_SIZE;
3185 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003186 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3187 if (iov.iov_base == NULL) {
3188 ret = -ENOMEM;
3189 goto fail;
3190 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003191 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003192 }
3193 qemu_iovec_init_external(&qiov, &iov, 1);
3194
3195 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003196
3197 /* Keep bounce buffer around if it is big enough for all
3198 * all future requests.
3199 */
3200 if (num < max_write_zeroes) {
3201 qemu_vfree(iov.iov_base);
3202 iov.iov_base = NULL;
3203 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003204 }
3205
3206 sector_num += num;
3207 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003208 }
3209
Kevin Wolf857d4f42014-05-20 13:16:51 +02003210fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003211 qemu_vfree(iov.iov_base);
3212 return ret;
3213}
3214
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003215/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003216 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003217 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003218static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003219 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3220 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003221{
3222 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003223 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003224 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003225
Kevin Wolfb404f722013-12-03 14:02:23 +01003226 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3227 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003228
Kevin Wolfb404f722013-12-03 14:02:23 +01003229 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3230 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003231 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003232
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003233 waited = wait_serialising_requests(req);
3234 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003235 assert(req->overlap_offset <= offset);
3236 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003237
Kevin Wolf65afd212013-12-03 14:55:55 +01003238 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003239
Peter Lieven465bee12014-05-18 00:58:19 +02003240 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3241 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3242 qemu_iovec_is_zero(qiov)) {
3243 flags |= BDRV_REQ_ZERO_WRITE;
3244 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3245 flags |= BDRV_REQ_MAY_UNMAP;
3246 }
3247 }
3248
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003249 if (ret < 0) {
3250 /* Do nothing, write notifier decided to fail this request */
3251 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003252 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003253 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003254 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003255 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003256 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3257 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003258 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003259
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003260 if (ret == 0 && !bs->enable_write_cache) {
3261 ret = bdrv_co_flush(bs);
3262 }
3263
Fam Zhenge4654d22013-11-13 18:29:43 +08003264 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003265
Benoît Canet5366d0c2014-09-05 15:46:18 +02003266 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003267
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003268 if (bs->growable && ret >= 0) {
3269 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3270 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003271
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003272 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003273}
3274
Kevin Wolfb404f722013-12-03 14:02:23 +01003275/*
3276 * Handle a write request in coroutine context
3277 */
Kevin Wolf66015532013-12-03 14:40:18 +01003278static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3279 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003280 BdrvRequestFlags flags)
3281{
Kevin Wolf65afd212013-12-03 14:55:55 +01003282 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003283 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3284 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3285 uint8_t *head_buf = NULL;
3286 uint8_t *tail_buf = NULL;
3287 QEMUIOVector local_qiov;
3288 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003289 int ret;
3290
3291 if (!bs->drv) {
3292 return -ENOMEDIUM;
3293 }
3294 if (bs->read_only) {
3295 return -EACCES;
3296 }
Kevin Wolf66015532013-12-03 14:40:18 +01003297 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003298 return -EIO;
3299 }
3300
Kevin Wolfb404f722013-12-03 14:02:23 +01003301 /* throttling disk I/O */
3302 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003303 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003304 }
3305
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003306 /*
3307 * Align write if necessary by performing a read-modify-write cycle.
3308 * Pad qiov with the read parts and be sure to have a tracked request not
3309 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3310 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003311 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003312
3313 if (offset & (align - 1)) {
3314 QEMUIOVector head_qiov;
3315 struct iovec head_iov;
3316
3317 mark_request_serialising(&req, align);
3318 wait_serialising_requests(&req);
3319
3320 head_buf = qemu_blockalign(bs, align);
3321 head_iov = (struct iovec) {
3322 .iov_base = head_buf,
3323 .iov_len = align,
3324 };
3325 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3326
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003327 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003328 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3329 align, &head_qiov, 0);
3330 if (ret < 0) {
3331 goto fail;
3332 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003333 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003334
3335 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3336 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3337 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3338 use_local_qiov = true;
3339
3340 bytes += offset & (align - 1);
3341 offset = offset & ~(align - 1);
3342 }
3343
3344 if ((offset + bytes) & (align - 1)) {
3345 QEMUIOVector tail_qiov;
3346 struct iovec tail_iov;
3347 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003348 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003349
3350 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003351 waited = wait_serialising_requests(&req);
3352 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003353
3354 tail_buf = qemu_blockalign(bs, align);
3355 tail_iov = (struct iovec) {
3356 .iov_base = tail_buf,
3357 .iov_len = align,
3358 };
3359 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3360
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003361 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003362 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3363 align, &tail_qiov, 0);
3364 if (ret < 0) {
3365 goto fail;
3366 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003367 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003368
3369 if (!use_local_qiov) {
3370 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3371 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3372 use_local_qiov = true;
3373 }
3374
3375 tail_bytes = (offset + bytes) & (align - 1);
3376 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3377
3378 bytes = ROUND_UP(bytes, align);
3379 }
3380
3381 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3382 use_local_qiov ? &local_qiov : qiov,
3383 flags);
3384
3385fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003386 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003387
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003388 if (use_local_qiov) {
3389 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003390 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003391 qemu_vfree(head_buf);
3392 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003393
Kevin Wolfb404f722013-12-03 14:02:23 +01003394 return ret;
3395}
3396
Kevin Wolf66015532013-12-03 14:40:18 +01003397static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3398 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3399 BdrvRequestFlags flags)
3400{
3401 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3402 return -EINVAL;
3403 }
3404
3405 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3406 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3407}
3408
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003409int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3410 int nb_sectors, QEMUIOVector *qiov)
3411{
3412 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3413
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003414 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3415}
3416
3417int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003418 int64_t sector_num, int nb_sectors,
3419 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003420{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003421 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003422
Peter Lievend32f35c2013-10-24 12:06:52 +02003423 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3424 flags &= ~BDRV_REQ_MAY_UNMAP;
3425 }
3426
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003427 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003428 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003429}
3430
bellard83f64092006-08-01 16:21:11 +00003431/**
bellard83f64092006-08-01 16:21:11 +00003432 * Truncate file to 'offset' bytes (needed only for file protocols)
3433 */
3434int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3435{
3436 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003437 int ret;
bellard83f64092006-08-01 16:21:11 +00003438 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003439 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003440 if (!drv->bdrv_truncate)
3441 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003442 if (bs->read_only)
3443 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003444
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003445 ret = drv->bdrv_truncate(bs, offset);
3446 if (ret == 0) {
3447 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003448 if (bs->blk) {
3449 blk_dev_resize_cb(bs->blk);
3450 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003451 }
3452 return ret;
bellard83f64092006-08-01 16:21:11 +00003453}
3454
3455/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003456 * Length of a allocated file in bytes. Sparse files are counted by actual
3457 * allocated space. Return < 0 if error or unknown.
3458 */
3459int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3460{
3461 BlockDriver *drv = bs->drv;
3462 if (!drv) {
3463 return -ENOMEDIUM;
3464 }
3465 if (drv->bdrv_get_allocated_file_size) {
3466 return drv->bdrv_get_allocated_file_size(bs);
3467 }
3468 if (bs->file) {
3469 return bdrv_get_allocated_file_size(bs->file);
3470 }
3471 return -ENOTSUP;
3472}
3473
3474/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003475 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003476 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003477int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003478{
3479 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003480
bellard83f64092006-08-01 16:21:11 +00003481 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003482 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003483
Kevin Wolfb94a2612013-10-29 12:18:58 +01003484 if (drv->has_variable_length) {
3485 int ret = refresh_total_sectors(bs, bs->total_sectors);
3486 if (ret < 0) {
3487 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003488 }
bellard83f64092006-08-01 16:21:11 +00003489 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003490 return bs->total_sectors;
3491}
3492
3493/**
3494 * Return length in bytes on success, -errno on error.
3495 * The length is always a multiple of BDRV_SECTOR_SIZE.
3496 */
3497int64_t bdrv_getlength(BlockDriverState *bs)
3498{
3499 int64_t ret = bdrv_nb_sectors(bs);
3500
3501 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003502}
3503
bellard19cb3732006-08-19 11:45:59 +00003504/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003505void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003506{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003507 int64_t nb_sectors = bdrv_nb_sectors(bs);
3508
3509 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003510}
bellardcf989512004-02-16 21:56:36 +00003511
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003512void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3513 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003514{
3515 bs->on_read_error = on_read_error;
3516 bs->on_write_error = on_write_error;
3517}
3518
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003519BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003520{
3521 return is_read ? bs->on_read_error : bs->on_write_error;
3522}
3523
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003524BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3525{
3526 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3527
3528 switch (on_err) {
3529 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003530 return (error == ENOSPC) ?
3531 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003532 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003533 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003534 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003535 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003536 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003537 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003538 default:
3539 abort();
3540 }
3541}
3542
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003543static void send_qmp_error_event(BlockDriverState *bs,
3544 BlockErrorAction action,
3545 bool is_read, int error)
3546{
3547 BlockErrorAction ac;
3548
3549 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3550 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3551 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003552 error == ENOSPC, strerror(error),
3553 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003554}
3555
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003556/* This is done by device models because, while the block layer knows
3557 * about the error, it does not know whether an operation comes from
3558 * the device or the block layer (from a job, for example).
3559 */
3560void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3561 bool is_read, int error)
3562{
3563 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003564
Wenchao Xiaa5895692014-06-18 08:43:30 +02003565 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003566 /* First set the iostatus, so that "info block" returns an iostatus
3567 * that matches the events raised so far (an additional error iostatus
3568 * is fine, but not a lost one).
3569 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003570 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003571
3572 /* Then raise the request to stop the VM and the event.
3573 * qemu_system_vmstop_request_prepare has two effects. First,
3574 * it ensures that the STOP event always comes after the
3575 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3576 * can observe the STOP event and do a "cont" before the STOP
3577 * event is issued, the VM will not stop. In this case, vm_start()
3578 * also ensures that the STOP/RESUME pair of events is emitted.
3579 */
3580 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003581 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003582 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3583 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003584 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003585 }
3586}
3587
bellardb3380822004-03-14 21:38:54 +00003588int bdrv_is_read_only(BlockDriverState *bs)
3589{
3590 return bs->read_only;
3591}
3592
ths985a03b2007-12-24 16:10:43 +00003593int bdrv_is_sg(BlockDriverState *bs)
3594{
3595 return bs->sg;
3596}
3597
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003598int bdrv_enable_write_cache(BlockDriverState *bs)
3599{
3600 return bs->enable_write_cache;
3601}
3602
Paolo Bonzini425b0142012-06-06 00:04:52 +02003603void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3604{
3605 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003606
3607 /* so a reopen() will preserve wce */
3608 if (wce) {
3609 bs->open_flags |= BDRV_O_CACHE_WB;
3610 } else {
3611 bs->open_flags &= ~BDRV_O_CACHE_WB;
3612 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003613}
3614
bellardea2384d2004-08-01 21:59:26 +00003615int bdrv_is_encrypted(BlockDriverState *bs)
3616{
3617 if (bs->backing_hd && bs->backing_hd->encrypted)
3618 return 1;
3619 return bs->encrypted;
3620}
3621
aliguoric0f4ce72009-03-05 23:01:01 +00003622int bdrv_key_required(BlockDriverState *bs)
3623{
3624 BlockDriverState *backing_hd = bs->backing_hd;
3625
3626 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3627 return 1;
3628 return (bs->encrypted && !bs->valid_key);
3629}
3630
bellardea2384d2004-08-01 21:59:26 +00003631int bdrv_set_key(BlockDriverState *bs, const char *key)
3632{
3633 int ret;
3634 if (bs->backing_hd && bs->backing_hd->encrypted) {
3635 ret = bdrv_set_key(bs->backing_hd, key);
3636 if (ret < 0)
3637 return ret;
3638 if (!bs->encrypted)
3639 return 0;
3640 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003641 if (!bs->encrypted) {
3642 return -EINVAL;
3643 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3644 return -ENOMEDIUM;
3645 }
aliguoric0f4ce72009-03-05 23:01:01 +00003646 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003647 if (ret < 0) {
3648 bs->valid_key = 0;
3649 } else if (!bs->valid_key) {
3650 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003651 if (bs->blk) {
3652 /* call the change callback now, we skipped it on open */
3653 blk_dev_change_media_cb(bs->blk, true);
3654 }
aliguoribb5fc202009-03-05 23:01:15 +00003655 }
aliguoric0f4ce72009-03-05 23:01:01 +00003656 return ret;
bellardea2384d2004-08-01 21:59:26 +00003657}
3658
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003659const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003660{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003661 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003662}
3663
Stefan Hajnocziada42402014-08-27 12:08:55 +01003664static int qsort_strcmp(const void *a, const void *b)
3665{
3666 return strcmp(a, b);
3667}
3668
ths5fafdf22007-09-16 21:08:06 +00003669void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003670 void *opaque)
3671{
3672 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003673 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003674 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003675 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003676
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003677 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003678 if (drv->format_name) {
3679 bool found = false;
3680 int i = count;
3681 while (formats && i && !found) {
3682 found = !strcmp(formats[--i], drv->format_name);
3683 }
3684
3685 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003686 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003687 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003688 }
3689 }
bellardea2384d2004-08-01 21:59:26 +00003690 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003691
3692 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3693
3694 for (i = 0; i < count; i++) {
3695 it(opaque, formats[i]);
3696 }
3697
Jeff Codye855e4f2014-04-28 18:29:54 -04003698 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003699}
3700
Benoît Canetdc364f42014-01-23 21:31:32 +01003701/* This function is to find block backend bs */
Markus Armbruster7f06d472014-10-07 13:59:12 +02003702/* TODO convert callers to blk_by_name(), then remove */
bellardb3380822004-03-14 21:38:54 +00003703BlockDriverState *bdrv_find(const char *name)
3704{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003705 BlockBackend *blk = blk_by_name(name);
bellardb3380822004-03-14 21:38:54 +00003706
Markus Armbruster7f06d472014-10-07 13:59:12 +02003707 return blk ? blk_bs(blk) : NULL;
bellardb3380822004-03-14 21:38:54 +00003708}
3709
Benoît Canetdc364f42014-01-23 21:31:32 +01003710/* This function is to find a node in the bs graph */
3711BlockDriverState *bdrv_find_node(const char *node_name)
3712{
3713 BlockDriverState *bs;
3714
3715 assert(node_name);
3716
3717 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3718 if (!strcmp(node_name, bs->node_name)) {
3719 return bs;
3720 }
3721 }
3722 return NULL;
3723}
3724
Benoît Canetc13163f2014-01-23 21:31:34 +01003725/* Put this QMP function here so it can access the static graph_bdrv_states. */
3726BlockDeviceInfoList *bdrv_named_nodes_list(void)
3727{
3728 BlockDeviceInfoList *list, *entry;
3729 BlockDriverState *bs;
3730
3731 list = NULL;
3732 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3733 entry = g_malloc0(sizeof(*entry));
3734 entry->value = bdrv_block_device_info(bs);
3735 entry->next = list;
3736 list = entry;
3737 }
3738
3739 return list;
3740}
3741
Benoît Canet12d3ba82014-01-23 21:31:35 +01003742BlockDriverState *bdrv_lookup_bs(const char *device,
3743 const char *node_name,
3744 Error **errp)
3745{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003746 BlockBackend *blk;
3747 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003748
Benoît Canet12d3ba82014-01-23 21:31:35 +01003749 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003750 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003751
Markus Armbruster7f06d472014-10-07 13:59:12 +02003752 if (blk) {
3753 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003754 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003755 }
3756
Benoît Canetdd67fa52014-02-12 17:15:06 +01003757 if (node_name) {
3758 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003759
Benoît Canetdd67fa52014-02-12 17:15:06 +01003760 if (bs) {
3761 return bs;
3762 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003763 }
3764
Benoît Canetdd67fa52014-02-12 17:15:06 +01003765 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3766 device ? device : "",
3767 node_name ? node_name : "");
3768 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003769}
3770
Jeff Cody5a6684d2014-06-25 15:40:09 -04003771/* If 'base' is in the same chain as 'top', return true. Otherwise,
3772 * return false. If either argument is NULL, return false. */
3773bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3774{
3775 while (top && top != base) {
3776 top = top->backing_hd;
3777 }
3778
3779 return top != NULL;
3780}
3781
Markus Armbruster2f399b02010-06-02 18:55:20 +02003782BlockDriverState *bdrv_next(BlockDriverState *bs)
3783{
3784 if (!bs) {
3785 return QTAILQ_FIRST(&bdrv_states);
3786 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003787 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003788}
3789
Markus Armbruster7f06d472014-10-07 13:59:12 +02003790/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003791const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003792{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003793 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003794}
3795
Markus Armbrusterc8433282012-06-05 16:49:24 +02003796int bdrv_get_flags(BlockDriverState *bs)
3797{
3798 return bs->open_flags;
3799}
3800
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003801int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003802{
3803 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003804 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003805
Benoît Canetdc364f42014-01-23 21:31:32 +01003806 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003807 AioContext *aio_context = bdrv_get_aio_context(bs);
3808 int ret;
3809
3810 aio_context_acquire(aio_context);
3811 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003812 if (ret < 0 && !result) {
3813 result = ret;
3814 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003815 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003816 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003817
3818 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003819}
3820
Peter Lieven3ac21622013-06-28 12:47:42 +02003821int bdrv_has_zero_init_1(BlockDriverState *bs)
3822{
3823 return 1;
3824}
3825
Kevin Wolff2feebb2010-04-14 17:30:35 +02003826int bdrv_has_zero_init(BlockDriverState *bs)
3827{
3828 assert(bs->drv);
3829
Paolo Bonzini11212d82013-09-04 19:00:27 +02003830 /* If BS is a copy on write image, it is initialized to
3831 the contents of the base image, which may not be zeroes. */
3832 if (bs->backing_hd) {
3833 return 0;
3834 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003835 if (bs->drv->bdrv_has_zero_init) {
3836 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003837 }
3838
Peter Lieven3ac21622013-06-28 12:47:42 +02003839 /* safe default */
3840 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003841}
3842
Peter Lieven4ce78692013-10-24 12:06:54 +02003843bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3844{
3845 BlockDriverInfo bdi;
3846
3847 if (bs->backing_hd) {
3848 return false;
3849 }
3850
3851 if (bdrv_get_info(bs, &bdi) == 0) {
3852 return bdi.unallocated_blocks_are_zero;
3853 }
3854
3855 return false;
3856}
3857
3858bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3859{
3860 BlockDriverInfo bdi;
3861
3862 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3863 return false;
3864 }
3865
3866 if (bdrv_get_info(bs, &bdi) == 0) {
3867 return bdi.can_write_zeroes_with_unmap;
3868 }
3869
3870 return false;
3871}
3872
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003873typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003874 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003875 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003876 int64_t sector_num;
3877 int nb_sectors;
3878 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003879 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003880 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003881} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003882
thsf58c7b32008-06-05 21:53:49 +00003883/*
3884 * Returns true iff the specified sector is present in the disk image. Drivers
3885 * not implementing the functionality are assumed to not support backing files,
3886 * hence all their sectors are reported as allocated.
3887 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003888 * If 'sector_num' is beyond the end of the disk image the return value is 0
3889 * and 'pnum' is set to 0.
3890 *
thsf58c7b32008-06-05 21:53:49 +00003891 * 'pnum' is set to the number of sectors (including and immediately following
3892 * the specified sector) that are known to be in the same
3893 * allocated/unallocated state.
3894 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003895 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3896 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003897 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003898static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3899 int64_t sector_num,
3900 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003901{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003902 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003903 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003904 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003905
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003906 total_sectors = bdrv_nb_sectors(bs);
3907 if (total_sectors < 0) {
3908 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003909 }
3910
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003911 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003912 *pnum = 0;
3913 return 0;
3914 }
3915
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003916 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003917 if (n < nb_sectors) {
3918 nb_sectors = n;
3919 }
3920
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003921 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003922 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02003923 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003924 if (bs->drv->protocol_name) {
3925 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3926 }
3927 return ret;
thsf58c7b32008-06-05 21:53:49 +00003928 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003929
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003930 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3931 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003932 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003933 return ret;
3934 }
3935
Peter Lieven92bc50a2013-10-08 14:43:14 +02003936 if (ret & BDRV_BLOCK_RAW) {
3937 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3938 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3939 *pnum, pnum);
3940 }
3941
Kevin Wolfe88ae222014-05-06 15:25:36 +02003942 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
3943 ret |= BDRV_BLOCK_ALLOCATED;
3944 }
3945
Peter Lievenc3d86882013-10-24 12:07:04 +02003946 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3947 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003948 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003949 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003950 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003951 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
3952 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003953 ret |= BDRV_BLOCK_ZERO;
3954 }
3955 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003956 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003957
3958 if (bs->file &&
3959 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3960 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02003961 int file_pnum;
3962
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003963 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02003964 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003965 if (ret2 >= 0) {
3966 /* Ignore errors. This is just providing extra information, it
3967 * is useful but not necessary.
3968 */
Max Reitz59c9a952014-10-22 17:00:15 +02003969 if (!file_pnum) {
3970 /* !file_pnum indicates an offset at or beyond the EOF; it is
3971 * perfectly valid for the format block driver to point to such
3972 * offsets, so catch it and mark everything as zero */
3973 ret |= BDRV_BLOCK_ZERO;
3974 } else {
3975 /* Limit request to the range reported by the protocol driver */
3976 *pnum = file_pnum;
3977 ret |= (ret2 & BDRV_BLOCK_ZERO);
3978 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003979 }
3980 }
3981
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003982 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003983}
3984
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003985/* Coroutine wrapper for bdrv_get_block_status() */
3986static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003987{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003988 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003989 BlockDriverState *bs = data->bs;
3990
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003991 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3992 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003993 data->done = true;
3994}
3995
3996/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003997 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003998 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003999 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004000 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004001int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4002 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004003{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004004 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004005 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004006 .bs = bs,
4007 .sector_num = sector_num,
4008 .nb_sectors = nb_sectors,
4009 .pnum = pnum,
4010 .done = false,
4011 };
4012
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004013 if (qemu_in_coroutine()) {
4014 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004015 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004016 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004017 AioContext *aio_context = bdrv_get_aio_context(bs);
4018
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004019 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004020 qemu_coroutine_enter(co, &data);
4021 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004022 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004023 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004024 }
4025 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004026}
4027
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004028int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4029 int nb_sectors, int *pnum)
4030{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004031 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4032 if (ret < 0) {
4033 return ret;
4034 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004035 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004036}
4037
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004038/*
4039 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4040 *
4041 * Return true if the given sector is allocated in any image between
4042 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4043 * sector is allocated in any image of the chain. Return false otherwise.
4044 *
4045 * 'pnum' is set to the number of sectors (including and immediately following
4046 * the specified sector) that are known to be in the same
4047 * allocated/unallocated state.
4048 *
4049 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004050int bdrv_is_allocated_above(BlockDriverState *top,
4051 BlockDriverState *base,
4052 int64_t sector_num,
4053 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004054{
4055 BlockDriverState *intermediate;
4056 int ret, n = nb_sectors;
4057
4058 intermediate = top;
4059 while (intermediate && intermediate != base) {
4060 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004061 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4062 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004063 if (ret < 0) {
4064 return ret;
4065 } else if (ret) {
4066 *pnum = pnum_inter;
4067 return 1;
4068 }
4069
4070 /*
4071 * [sector_num, nb_sectors] is unallocated on top but intermediate
4072 * might have
4073 *
4074 * [sector_num+x, nr_sectors] allocated.
4075 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004076 if (n > pnum_inter &&
4077 (intermediate == top ||
4078 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004079 n = pnum_inter;
4080 }
4081
4082 intermediate = intermediate->backing_hd;
4083 }
4084
4085 *pnum = n;
4086 return 0;
4087}
4088
aliguori045df332009-03-05 23:00:48 +00004089const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4090{
4091 if (bs->backing_hd && bs->backing_hd->encrypted)
4092 return bs->backing_file;
4093 else if (bs->encrypted)
4094 return bs->filename;
4095 else
4096 return NULL;
4097}
4098
ths5fafdf22007-09-16 21:08:06 +00004099void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004100 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004101{
Kevin Wolf3574c602011-10-26 11:02:11 +02004102 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004103}
4104
ths5fafdf22007-09-16 21:08:06 +00004105int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004106 const uint8_t *buf, int nb_sectors)
4107{
4108 BlockDriver *drv = bs->drv;
4109 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004110 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004111 if (!drv->bdrv_write_compressed)
4112 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004113 if (bdrv_check_request(bs, sector_num, nb_sectors))
4114 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004115
Fam Zhenge4654d22013-11-13 18:29:43 +08004116 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004117
bellardfaea38e2006-08-05 21:31:00 +00004118 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4119}
ths3b46e622007-09-17 08:09:54 +00004120
bellardfaea38e2006-08-05 21:31:00 +00004121int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4122{
4123 BlockDriver *drv = bs->drv;
4124 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004125 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004126 if (!drv->bdrv_get_info)
4127 return -ENOTSUP;
4128 memset(bdi, 0, sizeof(*bdi));
4129 return drv->bdrv_get_info(bs, bdi);
4130}
4131
Max Reitzeae041f2013-10-09 10:46:16 +02004132ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4133{
4134 BlockDriver *drv = bs->drv;
4135 if (drv && drv->bdrv_get_specific_info) {
4136 return drv->bdrv_get_specific_info(bs);
4137 }
4138 return NULL;
4139}
4140
Christoph Hellwig45566e92009-07-10 23:11:57 +02004141int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4142 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004143{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004144 QEMUIOVector qiov;
4145 struct iovec iov = {
4146 .iov_base = (void *) buf,
4147 .iov_len = size,
4148 };
4149
4150 qemu_iovec_init_external(&qiov, &iov, 1);
4151 return bdrv_writev_vmstate(bs, &qiov, pos);
4152}
4153
4154int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4155{
aliguori178e08a2009-04-05 19:10:55 +00004156 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004157
4158 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004159 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004160 } else if (drv->bdrv_save_vmstate) {
4161 return drv->bdrv_save_vmstate(bs, qiov, pos);
4162 } else if (bs->file) {
4163 return bdrv_writev_vmstate(bs->file, qiov, pos);
4164 }
4165
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004166 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004167}
4168
Christoph Hellwig45566e92009-07-10 23:11:57 +02004169int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4170 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004171{
4172 BlockDriver *drv = bs->drv;
4173 if (!drv)
4174 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004175 if (drv->bdrv_load_vmstate)
4176 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4177 if (bs->file)
4178 return bdrv_load_vmstate(bs->file, buf, pos, size);
4179 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004180}
4181
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004182void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4183{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004184 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004185 return;
4186 }
4187
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004188 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004189}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004190
Kevin Wolf41c695c2012-12-06 14:32:58 +01004191int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4192 const char *tag)
4193{
4194 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4195 bs = bs->file;
4196 }
4197
4198 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4199 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4200 }
4201
4202 return -ENOTSUP;
4203}
4204
Fam Zheng4cc70e92013-11-20 10:01:54 +08004205int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4206{
4207 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4208 bs = bs->file;
4209 }
4210
4211 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4212 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4213 }
4214
4215 return -ENOTSUP;
4216}
4217
Kevin Wolf41c695c2012-12-06 14:32:58 +01004218int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4219{
Max Reitz938789e2014-03-10 23:44:08 +01004220 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004221 bs = bs->file;
4222 }
4223
4224 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4225 return bs->drv->bdrv_debug_resume(bs, tag);
4226 }
4227
4228 return -ENOTSUP;
4229}
4230
4231bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4232{
4233 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4234 bs = bs->file;
4235 }
4236
4237 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4238 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4239 }
4240
4241 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004242}
4243
Blue Swirl199630b2010-07-25 20:49:34 +00004244int bdrv_is_snapshot(BlockDriverState *bs)
4245{
4246 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4247}
4248
Jeff Codyb1b1d782012-10-16 15:49:09 -04004249/* backing_file can either be relative, or absolute, or a protocol. If it is
4250 * relative, it must be relative to the chain. So, passing in bs->filename
4251 * from a BDS as backing_file should not be done, as that may be relative to
4252 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004253BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4254 const char *backing_file)
4255{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004256 char *filename_full = NULL;
4257 char *backing_file_full = NULL;
4258 char *filename_tmp = NULL;
4259 int is_protocol = 0;
4260 BlockDriverState *curr_bs = NULL;
4261 BlockDriverState *retval = NULL;
4262
4263 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004264 return NULL;
4265 }
4266
Jeff Codyb1b1d782012-10-16 15:49:09 -04004267 filename_full = g_malloc(PATH_MAX);
4268 backing_file_full = g_malloc(PATH_MAX);
4269 filename_tmp = g_malloc(PATH_MAX);
4270
4271 is_protocol = path_has_protocol(backing_file);
4272
4273 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4274
4275 /* If either of the filename paths is actually a protocol, then
4276 * compare unmodified paths; otherwise make paths relative */
4277 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4278 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4279 retval = curr_bs->backing_hd;
4280 break;
4281 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004282 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004283 /* If not an absolute filename path, make it relative to the current
4284 * image's filename path */
4285 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4286 backing_file);
4287
4288 /* We are going to compare absolute pathnames */
4289 if (!realpath(filename_tmp, filename_full)) {
4290 continue;
4291 }
4292
4293 /* We need to make sure the backing filename we are comparing against
4294 * is relative to the current image filename (or absolute) */
4295 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4296 curr_bs->backing_file);
4297
4298 if (!realpath(filename_tmp, backing_file_full)) {
4299 continue;
4300 }
4301
4302 if (strcmp(backing_file_full, filename_full) == 0) {
4303 retval = curr_bs->backing_hd;
4304 break;
4305 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004306 }
4307 }
4308
Jeff Codyb1b1d782012-10-16 15:49:09 -04004309 g_free(filename_full);
4310 g_free(backing_file_full);
4311 g_free(filename_tmp);
4312 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004313}
4314
Benoît Canetf198fd12012-08-02 10:22:47 +02004315int bdrv_get_backing_file_depth(BlockDriverState *bs)
4316{
4317 if (!bs->drv) {
4318 return 0;
4319 }
4320
4321 if (!bs->backing_hd) {
4322 return 0;
4323 }
4324
4325 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4326}
4327
bellard83f64092006-08-01 16:21:11 +00004328/**************************************************************/
4329/* async I/Os */
4330
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004331BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4332 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004333 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004334{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004335 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4336
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004337 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004338 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004339}
4340
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004341BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4342 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004343 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004344{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004345 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4346
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004347 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004348 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004349}
4350
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004351BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004352 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004353 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004354{
4355 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4356
4357 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4358 BDRV_REQ_ZERO_WRITE | flags,
4359 cb, opaque, true);
4360}
4361
Kevin Wolf40b4f532009-09-09 17:53:37 +02004362
4363typedef struct MultiwriteCB {
4364 int error;
4365 int num_requests;
4366 int num_callbacks;
4367 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004368 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004369 void *opaque;
4370 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004371 } callbacks[];
4372} MultiwriteCB;
4373
4374static void multiwrite_user_cb(MultiwriteCB *mcb)
4375{
4376 int i;
4377
4378 for (i = 0; i < mcb->num_callbacks; i++) {
4379 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004380 if (mcb->callbacks[i].free_qiov) {
4381 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4382 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004383 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004384 }
4385}
4386
4387static void multiwrite_cb(void *opaque, int ret)
4388{
4389 MultiwriteCB *mcb = opaque;
4390
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004391 trace_multiwrite_cb(mcb, ret);
4392
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004393 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004394 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004395 }
4396
4397 mcb->num_requests--;
4398 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004399 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004400 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004401 }
4402}
4403
4404static int multiwrite_req_compare(const void *a, const void *b)
4405{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004406 const BlockRequest *req1 = a, *req2 = b;
4407
4408 /*
4409 * Note that we can't simply subtract req2->sector from req1->sector
4410 * here as that could overflow the return value.
4411 */
4412 if (req1->sector > req2->sector) {
4413 return 1;
4414 } else if (req1->sector < req2->sector) {
4415 return -1;
4416 } else {
4417 return 0;
4418 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004419}
4420
4421/*
4422 * Takes a bunch of requests and tries to merge them. Returns the number of
4423 * requests that remain after merging.
4424 */
4425static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4426 int num_reqs, MultiwriteCB *mcb)
4427{
4428 int i, outidx;
4429
4430 // Sort requests by start sector
4431 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4432
4433 // Check if adjacent requests touch the same clusters. If so, combine them,
4434 // filling up gaps with zero sectors.
4435 outidx = 0;
4436 for (i = 1; i < num_reqs; i++) {
4437 int merge = 0;
4438 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4439
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004440 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004441 if (reqs[i].sector <= oldreq_last) {
4442 merge = 1;
4443 }
4444
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004445 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4446 merge = 0;
4447 }
4448
Kevin Wolf40b4f532009-09-09 17:53:37 +02004449 if (merge) {
4450 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004451 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004452 qemu_iovec_init(qiov,
4453 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4454
4455 // Add the first request to the merged one. If the requests are
4456 // overlapping, drop the last sectors of the first request.
4457 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004458 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004459
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004460 // We should need to add any zeros between the two requests
4461 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004462
4463 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004464 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004465
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004466 // Add tail of first request, if necessary
4467 if (qiov->size < reqs[outidx].qiov->size) {
4468 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4469 reqs[outidx].qiov->size - qiov->size);
4470 }
4471
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004472 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004473 reqs[outidx].qiov = qiov;
4474
4475 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4476 } else {
4477 outidx++;
4478 reqs[outidx].sector = reqs[i].sector;
4479 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4480 reqs[outidx].qiov = reqs[i].qiov;
4481 }
4482 }
4483
4484 return outidx + 1;
4485}
4486
4487/*
4488 * Submit multiple AIO write requests at once.
4489 *
4490 * On success, the function returns 0 and all requests in the reqs array have
4491 * been submitted. In error case this function returns -1, and any of the
4492 * requests may or may not be submitted yet. In particular, this means that the
4493 * callback will be called for some of the requests, for others it won't. The
4494 * caller must check the error field of the BlockRequest to wait for the right
4495 * callbacks (if error != 0, no callback will be called).
4496 *
4497 * The implementation may modify the contents of the reqs array, e.g. to merge
4498 * requests. However, the fields opaque and error are left unmodified as they
4499 * are used to signal failure for a single request to the caller.
4500 */
4501int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4502{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004503 MultiwriteCB *mcb;
4504 int i;
4505
Ryan Harper301db7c2011-03-07 10:01:04 -06004506 /* don't submit writes if we don't have a medium */
4507 if (bs->drv == NULL) {
4508 for (i = 0; i < num_reqs; i++) {
4509 reqs[i].error = -ENOMEDIUM;
4510 }
4511 return -1;
4512 }
4513
Kevin Wolf40b4f532009-09-09 17:53:37 +02004514 if (num_reqs == 0) {
4515 return 0;
4516 }
4517
4518 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004519 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004520 mcb->num_requests = 0;
4521 mcb->num_callbacks = num_reqs;
4522
4523 for (i = 0; i < num_reqs; i++) {
4524 mcb->callbacks[i].cb = reqs[i].cb;
4525 mcb->callbacks[i].opaque = reqs[i].opaque;
4526 }
4527
4528 // Check for mergable requests
4529 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4530
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004531 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4532
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004533 /* Run the aio requests. */
4534 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004535 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004536 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4537 reqs[i].nb_sectors, reqs[i].flags,
4538 multiwrite_cb, mcb,
4539 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004540 }
4541
4542 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004543}
4544
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004545void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004546{
Fam Zhengca5fd112014-09-11 13:41:27 +08004547 qemu_aio_ref(acb);
4548 bdrv_aio_cancel_async(acb);
4549 while (acb->refcnt > 1) {
4550 if (acb->aiocb_info->get_aio_context) {
4551 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4552 } else if (acb->bs) {
4553 aio_poll(bdrv_get_aio_context(acb->bs), true);
4554 } else {
4555 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004556 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004557 }
Fam Zheng80074292014-09-11 13:41:28 +08004558 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004559}
4560
4561/* Async version of aio cancel. The caller is not blocked if the acb implements
4562 * cancel_async, otherwise we do nothing and let the request normally complete.
4563 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004564void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004565{
4566 if (acb->aiocb_info->cancel_async) {
4567 acb->aiocb_info->cancel_async(acb);
4568 }
bellard83f64092006-08-01 16:21:11 +00004569}
4570
4571/**************************************************************/
4572/* async block device emulation */
4573
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004574typedef struct BlockAIOCBSync {
4575 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004576 QEMUBH *bh;
4577 int ret;
4578 /* vector translation state */
4579 QEMUIOVector *qiov;
4580 uint8_t *bounce;
4581 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004582} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004583
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004584static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004585 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004586};
4587
bellard83f64092006-08-01 16:21:11 +00004588static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004589{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004590 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004591
Kevin Wolf857d4f42014-05-20 13:16:51 +02004592 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004593 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004594 }
aliguoriceb42de2009-04-07 18:43:28 +00004595 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004596 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004597 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004598 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004599 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004600}
bellardbeac80c2006-06-26 20:08:57 +00004601
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004602static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4603 int64_t sector_num,
4604 QEMUIOVector *qiov,
4605 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004606 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004607 void *opaque,
4608 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004609
bellardea2384d2004-08-01 21:59:26 +00004610{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004611 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004612
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004613 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004614 acb->is_write = is_write;
4615 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004616 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004617 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004618
Kevin Wolf857d4f42014-05-20 13:16:51 +02004619 if (acb->bounce == NULL) {
4620 acb->ret = -ENOMEM;
4621 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004622 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004623 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004624 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004625 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004626 }
4627
pbrookce1a14d2006-08-07 02:38:06 +00004628 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004629
pbrookce1a14d2006-08-07 02:38:06 +00004630 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004631}
4632
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004633static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004634 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004635 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004636{
aliguorif141eaf2009-04-07 18:43:24 +00004637 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004638}
4639
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004640static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004641 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004642 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004643{
4644 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4645}
4646
Kevin Wolf68485422011-06-30 10:05:46 +02004647
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004648typedef struct BlockAIOCBCoroutine {
4649 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004650 BlockRequest req;
4651 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004652 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004653 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004654} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004655
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004656static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004657 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004658};
4659
Paolo Bonzini35246a62011-10-14 10:41:29 +02004660static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004661{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004662 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004663
4664 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004665
Kevin Wolf68485422011-06-30 10:05:46 +02004666 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004667 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004668}
4669
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004670/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4671static void coroutine_fn bdrv_co_do_rw(void *opaque)
4672{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004673 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004674 BlockDriverState *bs = acb->common.bs;
4675
4676 if (!acb->is_write) {
4677 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004678 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004679 } else {
4680 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004681 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004682 }
4683
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004684 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004685 qemu_bh_schedule(acb->bh);
4686}
4687
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004688static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4689 int64_t sector_num,
4690 QEMUIOVector *qiov,
4691 int nb_sectors,
4692 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004693 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004694 void *opaque,
4695 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004696{
4697 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004698 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004699
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004700 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004701 acb->req.sector = sector_num;
4702 acb->req.nb_sectors = nb_sectors;
4703 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004704 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004705 acb->is_write = is_write;
4706
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004707 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004708 qemu_coroutine_enter(co, acb);
4709
4710 return &acb->common;
4711}
4712
Paolo Bonzini07f07612011-10-17 12:32:12 +02004713static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004714{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004715 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004716 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004717
Paolo Bonzini07f07612011-10-17 12:32:12 +02004718 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004719 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004720 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004721}
4722
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004723BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004724 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004725{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004726 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004727
Paolo Bonzini07f07612011-10-17 12:32:12 +02004728 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004729 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004730
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004731 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004732
Paolo Bonzini07f07612011-10-17 12:32:12 +02004733 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4734 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004735
Alexander Graf016f5cf2010-05-26 17:51:49 +02004736 return &acb->common;
4737}
4738
Paolo Bonzini4265d622011-10-17 12:32:14 +02004739static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4740{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004741 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004742 BlockDriverState *bs = acb->common.bs;
4743
4744 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004745 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004746 qemu_bh_schedule(acb->bh);
4747}
4748
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004749BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004750 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004751 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004752{
4753 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004754 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004755
4756 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4757
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004758 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004759 acb->req.sector = sector_num;
4760 acb->req.nb_sectors = nb_sectors;
4761 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4762 qemu_coroutine_enter(co, acb);
4763
4764 return &acb->common;
4765}
4766
bellardea2384d2004-08-01 21:59:26 +00004767void bdrv_init(void)
4768{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004769 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004770}
pbrookce1a14d2006-08-07 02:38:06 +00004771
Markus Armbrustereb852012009-10-27 18:41:44 +01004772void bdrv_init_with_whitelist(void)
4773{
4774 use_bdrv_whitelist = 1;
4775 bdrv_init();
4776}
4777
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004778void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004779 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004780{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004781 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004782
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004783 acb = g_slice_alloc(aiocb_info->aiocb_size);
4784 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004785 acb->bs = bs;
4786 acb->cb = cb;
4787 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004788 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004789 return acb;
4790}
4791
Fam Zhengf197fe22014-09-11 13:41:08 +08004792void qemu_aio_ref(void *p)
4793{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004794 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004795 acb->refcnt++;
4796}
4797
Fam Zheng80074292014-09-11 13:41:28 +08004798void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004799{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004800 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004801 assert(acb->refcnt > 0);
4802 if (--acb->refcnt == 0) {
4803 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4804 }
pbrookce1a14d2006-08-07 02:38:06 +00004805}
bellard19cb3732006-08-19 11:45:59 +00004806
4807/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004808/* Coroutine block device emulation */
4809
4810typedef struct CoroutineIOCompletion {
4811 Coroutine *coroutine;
4812 int ret;
4813} CoroutineIOCompletion;
4814
4815static void bdrv_co_io_em_complete(void *opaque, int ret)
4816{
4817 CoroutineIOCompletion *co = opaque;
4818
4819 co->ret = ret;
4820 qemu_coroutine_enter(co->coroutine, NULL);
4821}
4822
4823static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4824 int nb_sectors, QEMUIOVector *iov,
4825 bool is_write)
4826{
4827 CoroutineIOCompletion co = {
4828 .coroutine = qemu_coroutine_self(),
4829 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004830 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004831
4832 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004833 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4834 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004835 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004836 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4837 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004838 }
4839
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004840 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004841 if (!acb) {
4842 return -EIO;
4843 }
4844 qemu_coroutine_yield();
4845
4846 return co.ret;
4847}
4848
4849static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4850 int64_t sector_num, int nb_sectors,
4851 QEMUIOVector *iov)
4852{
4853 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4854}
4855
4856static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4857 int64_t sector_num, int nb_sectors,
4858 QEMUIOVector *iov)
4859{
4860 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4861}
4862
Paolo Bonzini07f07612011-10-17 12:32:12 +02004863static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004864{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004865 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004866
Paolo Bonzini07f07612011-10-17 12:32:12 +02004867 rwco->ret = bdrv_co_flush(rwco->bs);
4868}
4869
4870int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4871{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004872 int ret;
4873
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004874 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004875 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004876 }
4877
Kevin Wolfca716362011-11-10 18:13:59 +01004878 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004879 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004880 if (bs->drv->bdrv_co_flush_to_os) {
4881 ret = bs->drv->bdrv_co_flush_to_os(bs);
4882 if (ret < 0) {
4883 return ret;
4884 }
4885 }
4886
Kevin Wolfca716362011-11-10 18:13:59 +01004887 /* But don't actually force it to the disk with cache=unsafe */
4888 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004889 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004890 }
4891
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004892 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004893 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004894 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004895 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004896 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004897 CoroutineIOCompletion co = {
4898 .coroutine = qemu_coroutine_self(),
4899 };
4900
4901 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4902 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004903 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004904 } else {
4905 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004906 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004907 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004908 } else {
4909 /*
4910 * Some block drivers always operate in either writethrough or unsafe
4911 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4912 * know how the server works (because the behaviour is hardcoded or
4913 * depends on server-side configuration), so we can't ensure that
4914 * everything is safe on disk. Returning an error doesn't work because
4915 * that would break guests even if the server operates in writethrough
4916 * mode.
4917 *
4918 * Let's hope the user knows what he's doing.
4919 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004920 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004921 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004922 if (ret < 0) {
4923 return ret;
4924 }
4925
4926 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4927 * in the case of cache=unsafe, so there are no useless flushes.
4928 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004929flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004930 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004931}
4932
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004933void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004934{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004935 Error *local_err = NULL;
4936 int ret;
4937
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004938 if (!bs->drv) {
4939 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06004940 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004941
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11004942 if (!(bs->open_flags & BDRV_O_INCOMING)) {
4943 return;
4944 }
4945 bs->open_flags &= ~BDRV_O_INCOMING;
4946
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004947 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004948 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004949 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004950 bdrv_invalidate_cache(bs->file, &local_err);
4951 }
4952 if (local_err) {
4953 error_propagate(errp, local_err);
4954 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004955 }
4956
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004957 ret = refresh_total_sectors(bs, bs->total_sectors);
4958 if (ret < 0) {
4959 error_setg_errno(errp, -ret, "Could not refresh total sector count");
4960 return;
4961 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004962}
4963
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004964void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004965{
4966 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004967 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06004968
Benoît Canetdc364f42014-01-23 21:31:32 +01004969 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004970 AioContext *aio_context = bdrv_get_aio_context(bs);
4971
4972 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004973 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004974 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004975 if (local_err) {
4976 error_propagate(errp, local_err);
4977 return;
4978 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004979 }
4980}
4981
Paolo Bonzini07f07612011-10-17 12:32:12 +02004982int bdrv_flush(BlockDriverState *bs)
4983{
4984 Coroutine *co;
4985 RwCo rwco = {
4986 .bs = bs,
4987 .ret = NOT_DONE,
4988 };
4989
4990 if (qemu_in_coroutine()) {
4991 /* Fast-path if already in coroutine context */
4992 bdrv_flush_co_entry(&rwco);
4993 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004994 AioContext *aio_context = bdrv_get_aio_context(bs);
4995
Paolo Bonzini07f07612011-10-17 12:32:12 +02004996 co = qemu_coroutine_create(bdrv_flush_co_entry);
4997 qemu_coroutine_enter(co, &rwco);
4998 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004999 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005000 }
5001 }
5002
5003 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005004}
5005
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005006typedef struct DiscardCo {
5007 BlockDriverState *bs;
5008 int64_t sector_num;
5009 int nb_sectors;
5010 int ret;
5011} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005012static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5013{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005014 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005015
5016 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5017}
5018
Peter Lieven6f14da52013-10-24 12:06:59 +02005019/* if no limit is specified in the BlockLimits use a default
5020 * of 32768 512-byte sectors (16 MiB) per request.
5021 */
5022#define MAX_DISCARD_DEFAULT 32768
5023
Paolo Bonzini4265d622011-10-17 12:32:14 +02005024int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5025 int nb_sectors)
5026{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005027 int max_discard;
5028
Paolo Bonzini4265d622011-10-17 12:32:14 +02005029 if (!bs->drv) {
5030 return -ENOMEDIUM;
5031 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5032 return -EIO;
5033 } else if (bs->read_only) {
5034 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005035 }
5036
Fam Zhenge4654d22013-11-13 18:29:43 +08005037 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005038
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005039 /* Do nothing if disabled. */
5040 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5041 return 0;
5042 }
5043
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005044 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005045 return 0;
5046 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005047
5048 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5049 while (nb_sectors > 0) {
5050 int ret;
5051 int num = nb_sectors;
5052
5053 /* align request */
5054 if (bs->bl.discard_alignment &&
5055 num >= bs->bl.discard_alignment &&
5056 sector_num % bs->bl.discard_alignment) {
5057 if (num > bs->bl.discard_alignment) {
5058 num = bs->bl.discard_alignment;
5059 }
5060 num -= sector_num % bs->bl.discard_alignment;
5061 }
5062
5063 /* limit request size */
5064 if (num > max_discard) {
5065 num = max_discard;
5066 }
5067
5068 if (bs->drv->bdrv_co_discard) {
5069 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5070 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005071 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005072 CoroutineIOCompletion co = {
5073 .coroutine = qemu_coroutine_self(),
5074 };
5075
5076 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5077 bdrv_co_io_em_complete, &co);
5078 if (acb == NULL) {
5079 return -EIO;
5080 } else {
5081 qemu_coroutine_yield();
5082 ret = co.ret;
5083 }
5084 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005085 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005086 return ret;
5087 }
5088
5089 sector_num += num;
5090 nb_sectors -= num;
5091 }
5092 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005093}
5094
5095int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5096{
5097 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005098 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005099 .bs = bs,
5100 .sector_num = sector_num,
5101 .nb_sectors = nb_sectors,
5102 .ret = NOT_DONE,
5103 };
5104
5105 if (qemu_in_coroutine()) {
5106 /* Fast-path if already in coroutine context */
5107 bdrv_discard_co_entry(&rwco);
5108 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005109 AioContext *aio_context = bdrv_get_aio_context(bs);
5110
Paolo Bonzini4265d622011-10-17 12:32:14 +02005111 co = qemu_coroutine_create(bdrv_discard_co_entry);
5112 qemu_coroutine_enter(co, &rwco);
5113 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005114 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005115 }
5116 }
5117
5118 return rwco.ret;
5119}
5120
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005121/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005122/* removable device support */
5123
5124/**
5125 * Return TRUE if the media is present
5126 */
5127int bdrv_is_inserted(BlockDriverState *bs)
5128{
5129 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005130
bellard19cb3732006-08-19 11:45:59 +00005131 if (!drv)
5132 return 0;
5133 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005134 return 1;
5135 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005136}
5137
5138/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005139 * Return whether the media changed since the last call to this
5140 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005141 */
5142int bdrv_media_changed(BlockDriverState *bs)
5143{
5144 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005145
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005146 if (drv && drv->bdrv_media_changed) {
5147 return drv->bdrv_media_changed(bs);
5148 }
5149 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005150}
5151
5152/**
5153 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5154 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005155void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005156{
5157 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005158 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005159
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005160 if (drv && drv->bdrv_eject) {
5161 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005162 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005163
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005164 device_name = bdrv_get_device_name(bs);
5165 if (device_name[0] != '\0') {
5166 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005167 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005168 }
bellard19cb3732006-08-19 11:45:59 +00005169}
5170
bellard19cb3732006-08-19 11:45:59 +00005171/**
5172 * Lock or unlock the media (if it is locked, the user won't be able
5173 * to eject it manually).
5174 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005175void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005176{
5177 BlockDriver *drv = bs->drv;
5178
Markus Armbruster025e8492011-09-06 18:58:47 +02005179 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005180
Markus Armbruster025e8492011-09-06 18:58:47 +02005181 if (drv && drv->bdrv_lock_medium) {
5182 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005183 }
5184}
ths985a03b2007-12-24 16:10:43 +00005185
5186/* needed for generic scsi interface */
5187
5188int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5189{
5190 BlockDriver *drv = bs->drv;
5191
5192 if (drv && drv->bdrv_ioctl)
5193 return drv->bdrv_ioctl(bs, req, buf);
5194 return -ENOTSUP;
5195}
aliguori7d780662009-03-12 19:57:08 +00005196
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005197BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005198 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005199 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005200{
aliguori221f7152009-03-28 17:28:41 +00005201 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005202
aliguori221f7152009-03-28 17:28:41 +00005203 if (drv && drv->bdrv_aio_ioctl)
5204 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5205 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005206}
aliguorie268ca52009-04-22 20:20:00 +00005207
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005208void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005209{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005210 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005211}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005212
aliguorie268ca52009-04-22 20:20:00 +00005213void *qemu_blockalign(BlockDriverState *bs, size_t size)
5214{
Kevin Wolf339064d2013-11-28 10:23:32 +01005215 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005216}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005217
Max Reitz9ebd8442014-10-22 14:09:27 +02005218void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5219{
5220 return memset(qemu_blockalign(bs, size), 0, size);
5221}
5222
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005223void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5224{
5225 size_t align = bdrv_opt_mem_align(bs);
5226
5227 /* Ensure that NULL is never returned on success */
5228 assert(align > 0);
5229 if (size == 0) {
5230 size = align;
5231 }
5232
5233 return qemu_try_memalign(align, size);
5234}
5235
Max Reitz9ebd8442014-10-22 14:09:27 +02005236void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5237{
5238 void *mem = qemu_try_blockalign(bs, size);
5239
5240 if (mem) {
5241 memset(mem, 0, size);
5242 }
5243
5244 return mem;
5245}
5246
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005247/*
5248 * Check if all memory in this vector is sector aligned.
5249 */
5250bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5251{
5252 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005253 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005254
5255 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005256 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005257 return false;
5258 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005259 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005260 return false;
5261 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005262 }
5263
5264 return true;
5265}
5266
Fam Zhengb8afb522014-04-16 09:34:30 +08005267BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5268 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005269{
5270 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005271 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005272
Paolo Bonzini50717e92013-01-21 17:09:45 +01005273 assert((granularity & (granularity - 1)) == 0);
5274
Fam Zhenge4654d22013-11-13 18:29:43 +08005275 granularity >>= BDRV_SECTOR_BITS;
5276 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005277 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005278 if (bitmap_size < 0) {
5279 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5280 errno = -bitmap_size;
5281 return NULL;
5282 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005283 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005284 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5285 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5286 return bitmap;
5287}
5288
5289void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5290{
5291 BdrvDirtyBitmap *bm, *next;
5292 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5293 if (bm == bitmap) {
5294 QLIST_REMOVE(bitmap, list);
5295 hbitmap_free(bitmap->bitmap);
5296 g_free(bitmap);
5297 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005298 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005299 }
5300}
5301
Fam Zheng21b56832013-11-13 18:29:44 +08005302BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5303{
5304 BdrvDirtyBitmap *bm;
5305 BlockDirtyInfoList *list = NULL;
5306 BlockDirtyInfoList **plist = &list;
5307
5308 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005309 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5310 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005311 info->count = bdrv_get_dirty_count(bs, bm);
5312 info->granularity =
5313 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5314 entry->value = info;
5315 *plist = entry;
5316 plist = &entry->next;
5317 }
5318
5319 return list;
5320}
5321
Fam Zhenge4654d22013-11-13 18:29:43 +08005322int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005323{
Fam Zhenge4654d22013-11-13 18:29:43 +08005324 if (bitmap) {
5325 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005326 } else {
5327 return 0;
5328 }
5329}
5330
Fam Zhenge4654d22013-11-13 18:29:43 +08005331void bdrv_dirty_iter_init(BlockDriverState *bs,
5332 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005333{
Fam Zhenge4654d22013-11-13 18:29:43 +08005334 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005335}
5336
5337void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5338 int nr_sectors)
5339{
Fam Zhenge4654d22013-11-13 18:29:43 +08005340 BdrvDirtyBitmap *bitmap;
5341 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5342 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005343 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005344}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005345
Fam Zhenge4654d22013-11-13 18:29:43 +08005346void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5347{
5348 BdrvDirtyBitmap *bitmap;
5349 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5350 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5351 }
5352}
5353
5354int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5355{
5356 return hbitmap_count(bitmap->bitmap);
5357}
5358
Fam Zheng9fcb0252013-08-23 09:14:46 +08005359/* Get a reference to bs */
5360void bdrv_ref(BlockDriverState *bs)
5361{
5362 bs->refcnt++;
5363}
5364
5365/* Release a previously grabbed reference to bs.
5366 * If after releasing, reference count is zero, the BlockDriverState is
5367 * deleted. */
5368void bdrv_unref(BlockDriverState *bs)
5369{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005370 if (!bs) {
5371 return;
5372 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005373 assert(bs->refcnt > 0);
5374 if (--bs->refcnt == 0) {
5375 bdrv_delete(bs);
5376 }
5377}
5378
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005379struct BdrvOpBlocker {
5380 Error *reason;
5381 QLIST_ENTRY(BdrvOpBlocker) list;
5382};
5383
5384bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5385{
5386 BdrvOpBlocker *blocker;
5387 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5388 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5389 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5390 if (errp) {
5391 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005392 bdrv_get_device_name(bs),
5393 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005394 }
5395 return true;
5396 }
5397 return false;
5398}
5399
5400void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5401{
5402 BdrvOpBlocker *blocker;
5403 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5404
Markus Armbruster5839e532014-08-19 10:31:08 +02005405 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005406 blocker->reason = reason;
5407 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5408}
5409
5410void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5411{
5412 BdrvOpBlocker *blocker, *next;
5413 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5414 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5415 if (blocker->reason == reason) {
5416 QLIST_REMOVE(blocker, list);
5417 g_free(blocker);
5418 }
5419 }
5420}
5421
5422void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5423{
5424 int i;
5425 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5426 bdrv_op_block(bs, i, reason);
5427 }
5428}
5429
5430void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5431{
5432 int i;
5433 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5434 bdrv_op_unblock(bs, i, reason);
5435 }
5436}
5437
5438bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5439{
5440 int i;
5441
5442 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5443 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5444 return false;
5445 }
5446 }
5447 return true;
5448}
5449
Luiz Capitulino28a72822011-09-26 17:43:50 -03005450void bdrv_iostatus_enable(BlockDriverState *bs)
5451{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005452 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005453 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005454}
5455
5456/* The I/O status is only enabled if the drive explicitly
5457 * enables it _and_ the VM is configured to stop on errors */
5458bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5459{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005460 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005461 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5462 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5463 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005464}
5465
5466void bdrv_iostatus_disable(BlockDriverState *bs)
5467{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005468 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005469}
5470
5471void bdrv_iostatus_reset(BlockDriverState *bs)
5472{
5473 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005474 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005475 if (bs->job) {
5476 block_job_iostatus_reset(bs->job);
5477 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005478 }
5479}
5480
Luiz Capitulino28a72822011-09-26 17:43:50 -03005481void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5482{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005483 assert(bdrv_iostatus_is_enabled(bs));
5484 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005485 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5486 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005487 }
5488}
5489
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005490void bdrv_img_create(const char *filename, const char *fmt,
5491 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005492 char *options, uint64_t img_size, int flags,
5493 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005494{
Chunyan Liu83d05212014-06-05 17:20:51 +08005495 QemuOptsList *create_opts = NULL;
5496 QemuOpts *opts = NULL;
5497 const char *backing_fmt, *backing_file;
5498 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005499 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005500 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005501 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005502 int ret = 0;
5503
5504 /* Find driver and parse its options */
5505 drv = bdrv_find_format(fmt);
5506 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005507 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005508 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005509 }
5510
Kevin Wolf98289622013-07-10 15:47:39 +02005511 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005512 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005513 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005514 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005515 }
5516
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005517 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5518 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005519
5520 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005521 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5522 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005523
5524 /* Parse -o options */
5525 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005526 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5527 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005528 goto out;
5529 }
5530 }
5531
5532 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005533 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005534 error_setg(errp, "Backing file not supported for file format '%s'",
5535 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005536 goto out;
5537 }
5538 }
5539
5540 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005541 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005542 error_setg(errp, "Backing file format not supported for file "
5543 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005544 goto out;
5545 }
5546 }
5547
Chunyan Liu83d05212014-06-05 17:20:51 +08005548 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5549 if (backing_file) {
5550 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005551 error_setg(errp, "Error: Trying to create an image with the "
5552 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005553 goto out;
5554 }
5555 }
5556
Chunyan Liu83d05212014-06-05 17:20:51 +08005557 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5558 if (backing_fmt) {
5559 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005560 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005561 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005562 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005563 goto out;
5564 }
5565 }
5566
5567 // The size for the image must always be specified, with one exception:
5568 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005569 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5570 if (size == -1) {
5571 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005572 BlockDriverState *bs;
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005573 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005574 int back_flags;
5575
5576 /* backing files always opened read-only */
5577 back_flags =
5578 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005579
Max Reitzf67503e2014-02-18 18:33:05 +01005580 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005581 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005582 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005583 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005584 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005585 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005586 error_get_pretty(local_err));
5587 error_free(local_err);
5588 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005589 goto out;
5590 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005591 size = bdrv_getlength(bs);
5592 if (size < 0) {
5593 error_setg_errno(errp, -size, "Could not get size of '%s'",
5594 backing_file);
5595 bdrv_unref(bs);
5596 goto out;
5597 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005598
Chunyan Liu83d05212014-06-05 17:20:51 +08005599 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005600
5601 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005602 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005603 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005604 goto out;
5605 }
5606 }
5607
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005608 if (!quiet) {
5609 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005610 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005611 puts("");
5612 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005613
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005614 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005615
Max Reitzcc84d902013-09-06 17:14:26 +02005616 if (ret == -EFBIG) {
5617 /* This is generally a better message than whatever the driver would
5618 * deliver (especially because of the cluster_size_hint), since that
5619 * is most probably not much different from "image too large". */
5620 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005621 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005622 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005623 }
Max Reitzcc84d902013-09-06 17:14:26 +02005624 error_setg(errp, "The image size is too large for file format '%s'"
5625 "%s", fmt, cluster_size_hint);
5626 error_free(local_err);
5627 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005628 }
5629
5630out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005631 qemu_opts_del(opts);
5632 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005633 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005634 error_propagate(errp, local_err);
5635 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005636}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005637
5638AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5639{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005640 return bs->aio_context;
5641}
5642
5643void bdrv_detach_aio_context(BlockDriverState *bs)
5644{
Max Reitz33384422014-06-20 21:57:33 +02005645 BdrvAioNotifier *baf;
5646
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005647 if (!bs->drv) {
5648 return;
5649 }
5650
Max Reitz33384422014-06-20 21:57:33 +02005651 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5652 baf->detach_aio_context(baf->opaque);
5653 }
5654
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005655 if (bs->io_limits_enabled) {
5656 throttle_detach_aio_context(&bs->throttle_state);
5657 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005658 if (bs->drv->bdrv_detach_aio_context) {
5659 bs->drv->bdrv_detach_aio_context(bs);
5660 }
5661 if (bs->file) {
5662 bdrv_detach_aio_context(bs->file);
5663 }
5664 if (bs->backing_hd) {
5665 bdrv_detach_aio_context(bs->backing_hd);
5666 }
5667
5668 bs->aio_context = NULL;
5669}
5670
5671void bdrv_attach_aio_context(BlockDriverState *bs,
5672 AioContext *new_context)
5673{
Max Reitz33384422014-06-20 21:57:33 +02005674 BdrvAioNotifier *ban;
5675
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005676 if (!bs->drv) {
5677 return;
5678 }
5679
5680 bs->aio_context = new_context;
5681
5682 if (bs->backing_hd) {
5683 bdrv_attach_aio_context(bs->backing_hd, new_context);
5684 }
5685 if (bs->file) {
5686 bdrv_attach_aio_context(bs->file, new_context);
5687 }
5688 if (bs->drv->bdrv_attach_aio_context) {
5689 bs->drv->bdrv_attach_aio_context(bs, new_context);
5690 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005691 if (bs->io_limits_enabled) {
5692 throttle_attach_aio_context(&bs->throttle_state, new_context);
5693 }
Max Reitz33384422014-06-20 21:57:33 +02005694
5695 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5696 ban->attached_aio_context(new_context, ban->opaque);
5697 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005698}
5699
5700void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5701{
5702 bdrv_drain_all(); /* ensure there are no in-flight requests */
5703
5704 bdrv_detach_aio_context(bs);
5705
5706 /* This function executes in the old AioContext so acquire the new one in
5707 * case it runs in a different thread.
5708 */
5709 aio_context_acquire(new_context);
5710 bdrv_attach_aio_context(bs, new_context);
5711 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005712}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005713
Max Reitz33384422014-06-20 21:57:33 +02005714void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5715 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5716 void (*detach_aio_context)(void *opaque), void *opaque)
5717{
5718 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5719 *ban = (BdrvAioNotifier){
5720 .attached_aio_context = attached_aio_context,
5721 .detach_aio_context = detach_aio_context,
5722 .opaque = opaque
5723 };
5724
5725 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5726}
5727
5728void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5729 void (*attached_aio_context)(AioContext *,
5730 void *),
5731 void (*detach_aio_context)(void *),
5732 void *opaque)
5733{
5734 BdrvAioNotifier *ban, *ban_next;
5735
5736 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5737 if (ban->attached_aio_context == attached_aio_context &&
5738 ban->detach_aio_context == detach_aio_context &&
5739 ban->opaque == opaque)
5740 {
5741 QLIST_REMOVE(ban, list);
5742 g_free(ban);
5743
5744 return;
5745 }
5746 }
5747
5748 abort();
5749}
5750
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005751void bdrv_add_before_write_notifier(BlockDriverState *bs,
5752 NotifierWithReturn *notifier)
5753{
5754 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5755}
Max Reitz6f176b42013-09-03 10:09:50 +02005756
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005757int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005758{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005759 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005760 return -ENOTSUP;
5761 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005762 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005763}
Benoît Canetf6186f42013-10-02 14:33:48 +02005764
Benoît Canetb5042a32014-03-03 19:11:34 +01005765/* This function will be called by the bdrv_recurse_is_first_non_filter method
5766 * of block filter and by bdrv_is_first_non_filter.
5767 * It is used to test if the given bs is the candidate or recurse more in the
5768 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005769 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005770bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5771 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005772{
Benoît Canetb5042a32014-03-03 19:11:34 +01005773 /* return false if basic checks fails */
5774 if (!bs || !bs->drv) {
5775 return false;
5776 }
5777
5778 /* the code reached a non block filter driver -> check if the bs is
5779 * the same as the candidate. It's the recursion termination condition.
5780 */
5781 if (!bs->drv->is_filter) {
5782 return bs == candidate;
5783 }
5784 /* Down this path the driver is a block filter driver */
5785
5786 /* If the block filter recursion method is defined use it to recurse down
5787 * the node graph.
5788 */
5789 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005790 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5791 }
5792
Benoît Canetb5042a32014-03-03 19:11:34 +01005793 /* the driver is a block filter but don't allow to recurse -> return false
5794 */
5795 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005796}
5797
5798/* This function checks if the candidate is the first non filter bs down it's
5799 * bs chain. Since we don't have pointers to parents it explore all bs chains
5800 * from the top. Some filters can choose not to pass down the recursion.
5801 */
5802bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5803{
5804 BlockDriverState *bs;
5805
5806 /* walk down the bs forest recursively */
5807 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5808 bool perm;
5809
Benoît Canetb5042a32014-03-03 19:11:34 +01005810 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005811 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005812
5813 /* candidate is the first non filter */
5814 if (perm) {
5815 return true;
5816 }
5817 }
5818
5819 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005820}
Benoît Canet09158f02014-06-27 18:25:25 +02005821
5822BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5823{
5824 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5825 if (!to_replace_bs) {
5826 error_setg(errp, "Node name '%s' not found", node_name);
5827 return NULL;
5828 }
5829
5830 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5831 return NULL;
5832 }
5833
5834 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5835 * most non filter in order to prevent data corruption.
5836 * Another benefit is that this tests exclude backing files which are
5837 * blocked by the backing blockers.
5838 */
5839 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5840 error_setg(errp, "Only top most non filter can be replaced");
5841 return NULL;
5842 }
5843
5844 return to_replace_bs;
5845}
Ming Lei448ad912014-07-04 18:04:33 +08005846
5847void bdrv_io_plug(BlockDriverState *bs)
5848{
5849 BlockDriver *drv = bs->drv;
5850 if (drv && drv->bdrv_io_plug) {
5851 drv->bdrv_io_plug(bs);
5852 } else if (bs->file) {
5853 bdrv_io_plug(bs->file);
5854 }
5855}
5856
5857void bdrv_io_unplug(BlockDriverState *bs)
5858{
5859 BlockDriver *drv = bs->drv;
5860 if (drv && drv->bdrv_io_unplug) {
5861 drv->bdrv_io_unplug(bs);
5862 } else if (bs->file) {
5863 bdrv_io_unplug(bs->file);
5864 }
5865}
5866
5867void bdrv_flush_io_queue(BlockDriverState *bs)
5868{
5869 BlockDriver *drv = bs->drv;
5870 if (drv && drv->bdrv_flush_io_queue) {
5871 drv->bdrv_flush_io_queue(bs);
5872 } else if (bs->file) {
5873 bdrv_flush_io_queue(bs->file);
5874 }
5875}
Max Reitz91af7012014-07-18 20:24:56 +02005876
5877static bool append_open_options(QDict *d, BlockDriverState *bs)
5878{
5879 const QDictEntry *entry;
5880 bool found_any = false;
5881
5882 for (entry = qdict_first(bs->options); entry;
5883 entry = qdict_next(bs->options, entry))
5884 {
5885 /* Only take options for this level and exclude all non-driver-specific
5886 * options */
5887 if (!strchr(qdict_entry_key(entry), '.') &&
5888 strcmp(qdict_entry_key(entry), "node-name"))
5889 {
5890 qobject_incref(qdict_entry_value(entry));
5891 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5892 found_any = true;
5893 }
5894 }
5895
5896 return found_any;
5897}
5898
5899/* Updates the following BDS fields:
5900 * - exact_filename: A filename which may be used for opening a block device
5901 * which (mostly) equals the given BDS (even without any
5902 * other options; so reading and writing must return the same
5903 * results, but caching etc. may be different)
5904 * - full_open_options: Options which, when given when opening a block device
5905 * (without a filename), result in a BDS (mostly)
5906 * equalling the given one
5907 * - filename: If exact_filename is set, it is copied here. Otherwise,
5908 * full_open_options is converted to a JSON object, prefixed with
5909 * "json:" (for use through the JSON pseudo protocol) and put here.
5910 */
5911void bdrv_refresh_filename(BlockDriverState *bs)
5912{
5913 BlockDriver *drv = bs->drv;
5914 QDict *opts;
5915
5916 if (!drv) {
5917 return;
5918 }
5919
5920 /* This BDS's file name will most probably depend on its file's name, so
5921 * refresh that first */
5922 if (bs->file) {
5923 bdrv_refresh_filename(bs->file);
5924 }
5925
5926 if (drv->bdrv_refresh_filename) {
5927 /* Obsolete information is of no use here, so drop the old file name
5928 * information before refreshing it */
5929 bs->exact_filename[0] = '\0';
5930 if (bs->full_open_options) {
5931 QDECREF(bs->full_open_options);
5932 bs->full_open_options = NULL;
5933 }
5934
5935 drv->bdrv_refresh_filename(bs);
5936 } else if (bs->file) {
5937 /* Try to reconstruct valid information from the underlying file */
5938 bool has_open_options;
5939
5940 bs->exact_filename[0] = '\0';
5941 if (bs->full_open_options) {
5942 QDECREF(bs->full_open_options);
5943 bs->full_open_options = NULL;
5944 }
5945
5946 opts = qdict_new();
5947 has_open_options = append_open_options(opts, bs);
5948
5949 /* If no specific options have been given for this BDS, the filename of
5950 * the underlying file should suffice for this one as well */
5951 if (bs->file->exact_filename[0] && !has_open_options) {
5952 strcpy(bs->exact_filename, bs->file->exact_filename);
5953 }
5954 /* Reconstructing the full options QDict is simple for most format block
5955 * drivers, as long as the full options are known for the underlying
5956 * file BDS. The full options QDict of that file BDS should somehow
5957 * contain a representation of the filename, therefore the following
5958 * suffices without querying the (exact_)filename of this BDS. */
5959 if (bs->file->full_open_options) {
5960 qdict_put_obj(opts, "driver",
5961 QOBJECT(qstring_from_str(drv->format_name)));
5962 QINCREF(bs->file->full_open_options);
5963 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
5964
5965 bs->full_open_options = opts;
5966 } else {
5967 QDECREF(opts);
5968 }
5969 } else if (!bs->full_open_options && qdict_size(bs->options)) {
5970 /* There is no underlying file BDS (at least referenced by BDS.file),
5971 * so the full options QDict should be equal to the options given
5972 * specifically for this block device when it was opened (plus the
5973 * driver specification).
5974 * Because those options don't change, there is no need to update
5975 * full_open_options when it's already set. */
5976
5977 opts = qdict_new();
5978 append_open_options(opts, bs);
5979 qdict_put_obj(opts, "driver",
5980 QOBJECT(qstring_from_str(drv->format_name)));
5981
5982 if (bs->exact_filename[0]) {
5983 /* This may not work for all block protocol drivers (some may
5984 * require this filename to be parsed), but we have to find some
5985 * default solution here, so just include it. If some block driver
5986 * does not support pure options without any filename at all or
5987 * needs some special format of the options QDict, it needs to
5988 * implement the driver-specific bdrv_refresh_filename() function.
5989 */
5990 qdict_put_obj(opts, "filename",
5991 QOBJECT(qstring_from_str(bs->exact_filename)));
5992 }
5993
5994 bs->full_open_options = opts;
5995 }
5996
5997 if (bs->exact_filename[0]) {
5998 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
5999 } else if (bs->full_open_options) {
6000 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6001 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6002 qstring_get_str(json));
6003 QDECREF(json);
6004 }
6005}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006006
6007/* This accessor function purpose is to allow the device models to access the
6008 * BlockAcctStats structure embedded inside a BlockDriverState without being
6009 * aware of the BlockDriverState structure layout.
6010 * It will go away when the BlockAcctStats structure will be moved inside
6011 * the device models.
6012 */
6013BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6014{
6015 return &bs->stats;
6016}