blob: 41793419d8e1e8b3d429402af077988a02bec899 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020061static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000062 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020063 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020064static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000065 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020066 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020079static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
83 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020084 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020085 void *opaque,
86 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200182 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 QEMU_CLOCK_VIRTUAL,
184 bdrv_throttle_read_timer_cb,
185 bdrv_throttle_write_timer_cb,
186 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800187 bs->io_limits_enabled = true;
188}
189
Benoît Canetcc0681c2013-09-02 14:14:39 +0200190/* This function makes an IO wait if needed
191 *
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
194 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800195static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100196 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200197 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200199 /* does this io must wait */
200 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* if must wait or any request of this type throttled queue the IO */
203 if (must_wait ||
204 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
205 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800206 }
207
Benoît Canetcc0681c2013-09-02 14:14:39 +0200208 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100209 throttle_account(&bs->throttle_state, is_write, bytes);
210
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800211
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
214 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800215 }
216
Benoît Canetcc0681c2013-09-02 14:14:39 +0200217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800219}
220
Kevin Wolf339064d2013-11-28 10:23:32 +0100221size_t bdrv_opt_mem_align(BlockDriverState *bs)
222{
223 if (!bs || !bs->drv) {
224 /* 4k should be on the safe side */
225 return 4096;
226 }
227
228 return bs->bl.opt_mem_alignment;
229}
230
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000231/* check if the path starts with "<protocol>:" */
232static int path_has_protocol(const char *path)
233{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200234 const char *p;
235
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000236#ifdef _WIN32
237 if (is_windows_drive(path) ||
238 is_windows_drive_prefix(path)) {
239 return 0;
240 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200241 p = path + strcspn(path, ":/\\");
242#else
243 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244#endif
245
Paolo Bonzini947995c2012-05-08 16:51:48 +0200246 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247}
248
bellard83f64092006-08-01 16:21:11 +0000249int path_is_absolute(const char *path)
250{
bellard21664422007-01-07 18:22:37 +0000251#ifdef _WIN32
252 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200253 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000254 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200255 }
256 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000257#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000259#endif
bellard83f64092006-08-01 16:21:11 +0000260}
261
262/* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
264 supported. */
265void path_combine(char *dest, int dest_size,
266 const char *base_path,
267 const char *filename)
268{
269 const char *p, *p1;
270 int len;
271
272 if (dest_size <= 0)
273 return;
274 if (path_is_absolute(filename)) {
275 pstrcpy(dest, dest_size, filename);
276 } else {
277 p = strchr(base_path, ':');
278 if (p)
279 p++;
280 else
281 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000282 p1 = strrchr(base_path, '/');
283#ifdef _WIN32
284 {
285 const char *p2;
286 p2 = strrchr(base_path, '\\');
287 if (!p1 || p2 > p1)
288 p1 = p2;
289 }
290#endif
bellard83f64092006-08-01 16:21:11 +0000291 if (p1)
292 p1++;
293 else
294 p1 = base_path;
295 if (p1 > p)
296 p = p1;
297 len = p - base_path;
298 if (len > dest_size - 1)
299 len = dest_size - 1;
300 memcpy(dest, base_path, len);
301 dest[len] = '\0';
302 pstrcat(dest, dest_size, filename);
303 }
304}
305
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200306void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307{
308 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
309 pstrcpy(dest, sz, bs->backing_file);
310 } else {
311 path_combine(dest, sz, bs->filename, bs->backing_file);
312 }
313}
314
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500315void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000316{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200319 bdrv->bdrv_co_readv = bdrv_co_readv_em;
320 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
324 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200325 if (!bdrv->bdrv_aio_readv) {
326 /* add AIO emulation layer */
327 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
328 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200329 }
bellard83f64092006-08-01 16:21:11 +0000330 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200331
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100332 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000333}
bellardb3380822004-03-14 21:38:54 +0000334
Markus Armbruster7f06d472014-10-07 13:59:12 +0200335BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200337 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200338
Markus Armbrustere4e99862014-10-07 13:59:03 +0200339 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200340 return bs;
341}
342
343BlockDriverState *bdrv_new(void)
344{
345 BlockDriverState *bs;
346 int i;
347
Markus Armbruster5839e532014-08-19 10:31:08 +0200348 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800349 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800350 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
351 QLIST_INIT(&bs->op_blockers[i]);
352 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300353 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200354 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200355 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200356 qemu_co_queue_init(&bs->throttled_reqs[0]);
357 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800358 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200359 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200360
bellardb3380822004-03-14 21:38:54 +0000361 return bs;
362}
363
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200364void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
365{
366 notifier_list_add(&bs->close_notifiers, notify);
367}
368
bellardea2384d2004-08-01 21:59:26 +0000369BlockDriver *bdrv_find_format(const char *format_name)
370{
371 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100372 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
373 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000374 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100375 }
bellardea2384d2004-08-01 21:59:26 +0000376 }
377 return NULL;
378}
379
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800380static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100381{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800382 static const char *whitelist_rw[] = {
383 CONFIG_BDRV_RW_WHITELIST
384 };
385 static const char *whitelist_ro[] = {
386 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100387 };
388 const char **p;
389
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100391 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800392 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100393
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800394 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100395 if (!strcmp(drv->format_name, *p)) {
396 return 1;
397 }
398 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800399 if (read_only) {
400 for (p = whitelist_ro; *p; p++) {
401 if (!strcmp(drv->format_name, *p)) {
402 return 1;
403 }
404 }
405 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100406 return 0;
407}
408
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800409BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
410 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100411{
412 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800413 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100414}
415
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800416typedef struct CreateCo {
417 BlockDriver *drv;
418 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800419 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200421 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800422} CreateCo;
423
424static void coroutine_fn bdrv_create_co_entry(void *opaque)
425{
Max Reitzcc84d902013-09-06 17:14:26 +0200426 Error *local_err = NULL;
427 int ret;
428
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800429 CreateCo *cco = opaque;
430 assert(cco->drv);
431
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800432 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100433 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200434 error_propagate(&cco->err, local_err);
435 }
436 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800437}
438
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200439int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800440 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000441{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200443
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800444 Coroutine *co;
445 CreateCo cco = {
446 .drv = drv,
447 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800448 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800449 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200450 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800451 };
452
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800453 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200454 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300455 ret = -ENOTSUP;
456 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800457 }
458
459 if (qemu_in_coroutine()) {
460 /* Fast-path if already in coroutine context */
461 bdrv_create_co_entry(&cco);
462 } else {
463 co = qemu_coroutine_create(bdrv_create_co_entry);
464 qemu_coroutine_enter(co, &cco);
465 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200466 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800467 }
468 }
469
470 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200471 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100472 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200473 error_propagate(errp, cco.err);
474 } else {
475 error_setg_errno(errp, -ret, "Could not create image");
476 }
477 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800478
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300479out:
480 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800481 return ret;
bellardea2384d2004-08-01 21:59:26 +0000482}
483
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800484int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200485{
486 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200487 Error *local_err = NULL;
488 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200489
Kevin Wolf98289622013-07-10 15:47:39 +0200490 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200491 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200492 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000493 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200494 }
495
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800496 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100497 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200498 error_propagate(errp, local_err);
499 }
500 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200501}
502
Kevin Wolf3baca892014-07-16 17:48:16 +0200503void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100504{
505 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200506 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100507
508 memset(&bs->bl, 0, sizeof(bs->bl));
509
Kevin Wolf466ad822013-12-11 19:50:32 +0100510 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200511 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 /* Take some limits from the children as a default */
515 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200516 bdrv_refresh_limits(bs->file, &local_err);
517 if (local_err) {
518 error_propagate(errp, local_err);
519 return;
520 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100521 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100522 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100523 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
524 } else {
525 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100526 }
527
528 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200529 bdrv_refresh_limits(bs->backing_hd, &local_err);
530 if (local_err) {
531 error_propagate(errp, local_err);
532 return;
533 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100534 bs->bl.opt_transfer_length =
535 MAX(bs->bl.opt_transfer_length,
536 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100537 bs->bl.max_transfer_length =
538 MIN_NON_ZERO(bs->bl.max_transfer_length,
539 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100540 bs->bl.opt_mem_alignment =
541 MAX(bs->bl.opt_mem_alignment,
542 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100543 }
544
545 /* Then let the driver override it */
546 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200547 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100548 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100549}
550
Jim Meyeringeba25052012-05-28 09:27:54 +0200551/*
552 * Create a uniquely-named empty temporary file.
553 * Return 0 upon success, otherwise a negative errno value.
554 */
555int get_tmp_filename(char *filename, int size)
556{
bellardd5249392004-08-03 21:14:23 +0000557#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000558 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200559 /* GetTempFileName requires that its output buffer (4th param)
560 have length MAX_PATH or greater. */
561 assert(size >= MAX_PATH);
562 return (GetTempPath(MAX_PATH, temp_dir)
563 && GetTempFileName(temp_dir, "qem", 0, filename)
564 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000565#else
bellardea2384d2004-08-01 21:59:26 +0000566 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000567 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000568 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530569 if (!tmpdir) {
570 tmpdir = "/var/tmp";
571 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200572 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
573 return -EOVERFLOW;
574 }
bellardea2384d2004-08-01 21:59:26 +0000575 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800576 if (fd < 0) {
577 return -errno;
578 }
579 if (close(fd) != 0) {
580 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200581 return -errno;
582 }
583 return 0;
bellardd5249392004-08-03 21:14:23 +0000584#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200585}
bellardea2384d2004-08-01 21:59:26 +0000586
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200587/*
588 * Detect host devices. By convention, /dev/cdrom[N] is always
589 * recognized as a host CDROM.
590 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200591static BlockDriver *find_hdev_driver(const char *filename)
592{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200593 int score_max = 0, score;
594 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200595
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100596 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200597 if (d->bdrv_probe_device) {
598 score = d->bdrv_probe_device(filename);
599 if (score > score_max) {
600 score_max = score;
601 drv = d;
602 }
603 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200604 }
605
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200606 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200607}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200608
Kevin Wolf98289622013-07-10 15:47:39 +0200609BlockDriver *bdrv_find_protocol(const char *filename,
610 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200611{
612 BlockDriver *drv1;
613 char protocol[128];
614 int len;
615 const char *p;
616
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200617 /* TODO Drivers without bdrv_file_open must be specified explicitly */
618
Christoph Hellwig39508e72010-06-23 12:25:17 +0200619 /*
620 * XXX(hch): we really should not let host device detection
621 * override an explicit protocol specification, but moving this
622 * later breaks access to device names with colons in them.
623 * Thanks to the brain-dead persistent naming schemes on udev-
624 * based Linux systems those actually are quite common.
625 */
626 drv1 = find_hdev_driver(filename);
627 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200628 return drv1;
629 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200630
Kevin Wolf98289622013-07-10 15:47:39 +0200631 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200632 return bdrv_find_format("file");
633 }
Kevin Wolf98289622013-07-10 15:47:39 +0200634
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000635 p = strchr(filename, ':');
636 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200637 len = p - filename;
638 if (len > sizeof(protocol) - 1)
639 len = sizeof(protocol) - 1;
640 memcpy(protocol, filename, len);
641 protocol[len] = '\0';
642 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
643 if (drv1->protocol_name &&
644 !strcmp(drv1->protocol_name, protocol)) {
645 return drv1;
646 }
647 }
648 return NULL;
649}
650
Kevin Wolff500a6d2012-11-12 17:35:27 +0100651static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200652 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000653{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100654 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000655 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000656 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100657 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700658
Kevin Wolf08a00552010-06-01 18:37:31 +0200659 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100660 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200661 drv = bdrv_find_format("raw");
662 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200663 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200664 ret = -ENOENT;
665 }
666 *pdrv = drv;
667 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700668 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700669
bellard83f64092006-08-01 16:21:11 +0000670 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000671 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200672 error_setg_errno(errp, -ret, "Could not read image for determining its "
673 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200674 *pdrv = NULL;
675 return ret;
bellard83f64092006-08-01 16:21:11 +0000676 }
677
bellardea2384d2004-08-01 21:59:26 +0000678 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200679 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100680 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000681 if (drv1->bdrv_probe) {
682 score = drv1->bdrv_probe(buf, ret, filename);
683 if (score > score_max) {
684 score_max = score;
685 drv = drv1;
686 }
bellardea2384d2004-08-01 21:59:26 +0000687 }
688 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200689 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200690 error_setg(errp, "Could not determine image format: No compatible "
691 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200692 ret = -ENOENT;
693 }
694 *pdrv = drv;
695 return ret;
bellardea2384d2004-08-01 21:59:26 +0000696}
697
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100698/**
699 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200700 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100701 */
702static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
703{
704 BlockDriver *drv = bs->drv;
705
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700706 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
707 if (bs->sg)
708 return 0;
709
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100710 /* query actual device if possible, otherwise just trust the hint */
711 if (drv->bdrv_getlength) {
712 int64_t length = drv->bdrv_getlength(bs);
713 if (length < 0) {
714 return length;
715 }
Fam Zheng7e382002013-11-06 19:48:06 +0800716 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100717 }
718
719 bs->total_sectors = hint;
720 return 0;
721}
722
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100723/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100724 * Set open flags for a given discard mode
725 *
726 * Return 0 on success, -1 if the discard mode was invalid.
727 */
728int bdrv_parse_discard_flags(const char *mode, int *flags)
729{
730 *flags &= ~BDRV_O_UNMAP;
731
732 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
733 /* do nothing */
734 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
735 *flags |= BDRV_O_UNMAP;
736 } else {
737 return -1;
738 }
739
740 return 0;
741}
742
743/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100744 * Set open flags for a given cache mode
745 *
746 * Return 0 on success, -1 if the cache mode was invalid.
747 */
748int bdrv_parse_cache_flags(const char *mode, int *flags)
749{
750 *flags &= ~BDRV_O_CACHE_MASK;
751
752 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
753 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100754 } else if (!strcmp(mode, "directsync")) {
755 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100756 } else if (!strcmp(mode, "writeback")) {
757 *flags |= BDRV_O_CACHE_WB;
758 } else if (!strcmp(mode, "unsafe")) {
759 *flags |= BDRV_O_CACHE_WB;
760 *flags |= BDRV_O_NO_FLUSH;
761 } else if (!strcmp(mode, "writethrough")) {
762 /* this is the default */
763 } else {
764 return -1;
765 }
766
767 return 0;
768}
769
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000770/**
771 * The copy-on-read flag is actually a reference count so multiple users may
772 * use the feature without worrying about clobbering its previous state.
773 * Copy-on-read stays enabled until all users have called to disable it.
774 */
775void bdrv_enable_copy_on_read(BlockDriverState *bs)
776{
777 bs->copy_on_read++;
778}
779
780void bdrv_disable_copy_on_read(BlockDriverState *bs)
781{
782 assert(bs->copy_on_read > 0);
783 bs->copy_on_read--;
784}
785
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200786/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200787 * Returns the flags that a temporary snapshot should get, based on the
788 * originally requested flags (the originally requested image will have flags
789 * like a backing file)
790 */
791static int bdrv_temp_snapshot_flags(int flags)
792{
793 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
794}
795
796/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200797 * Returns the flags that bs->file should get, based on the given flags for
798 * the parent BDS
799 */
800static int bdrv_inherited_flags(int flags)
801{
802 /* Enable protocol handling, disable format probing for bs->file */
803 flags |= BDRV_O_PROTOCOL;
804
805 /* Our block drivers take care to send flushes and respect unmap policy,
806 * so we can enable both unconditionally on lower layers. */
807 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
808
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200809 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200810 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200811
812 return flags;
813}
814
Kevin Wolf317fc442014-04-25 13:27:34 +0200815/*
816 * Returns the flags that bs->backing_hd should get, based on the given flags
817 * for the parent BDS
818 */
819static int bdrv_backing_flags(int flags)
820{
821 /* backing files always opened read-only */
822 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
823
824 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200825 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200826
827 return flags;
828}
829
Kevin Wolf7b272452012-11-12 17:05:39 +0100830static int bdrv_open_flags(BlockDriverState *bs, int flags)
831{
832 int open_flags = flags | BDRV_O_CACHE_WB;
833
834 /*
835 * Clear flags that are internal to the block layer before opening the
836 * image.
837 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200838 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100839
840 /*
841 * Snapshots should be writable.
842 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200843 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100844 open_flags |= BDRV_O_RDWR;
845 }
846
847 return open_flags;
848}
849
Kevin Wolf636ea372014-01-24 14:11:52 +0100850static void bdrv_assign_node_name(BlockDriverState *bs,
851 const char *node_name,
852 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100853{
854 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100855 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100856 }
857
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200858 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200859 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200860 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100861 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100862 }
863
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100864 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200865 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100866 error_setg(errp, "node-name=%s is conflicting with a device id",
867 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100868 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100869 }
870
Benoît Canet6913c0c2014-01-23 21:31:33 +0100871 /* takes care of avoiding duplicates node names */
872 if (bdrv_find_node(node_name)) {
873 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100874 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100875 }
876
877 /* copy node name into the bs and insert it into the graph list */
878 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
879 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100880}
881
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200882/*
Kevin Wolf57915332010-04-14 15:24:50 +0200883 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100884 *
885 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200886 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100887static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200888 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200889{
890 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200891 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100892 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200893 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200894
895 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200896 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100897 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200898
Kevin Wolf45673672013-04-22 17:48:40 +0200899 if (file != NULL) {
900 filename = file->filename;
901 } else {
902 filename = qdict_get_try_str(options, "filename");
903 }
904
Kevin Wolf765003d2014-02-03 14:49:42 +0100905 if (drv->bdrv_needs_filename && !filename) {
906 error_setg(errp, "The '%s' block driver requires a file name",
907 drv->format_name);
908 return -EINVAL;
909 }
910
Kevin Wolf45673672013-04-22 17:48:40 +0200911 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100912
Benoît Canet6913c0c2014-01-23 21:31:33 +0100913 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100914 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200915 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100916 error_propagate(errp, local_err);
917 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100918 }
919 qdict_del(options, "node-name");
920
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100921 /* bdrv_open() with directly using a protocol as drv. This layer is already
922 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
923 * and return immediately. */
924 if (file != NULL && drv->bdrv_file_open) {
925 bdrv_swap(file, bs);
926 return 0;
927 }
928
Kevin Wolf57915332010-04-14 15:24:50 +0200929 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100930 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100931 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800932 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800933 open_flags = bdrv_open_flags(bs, flags);
934 bs->read_only = !(open_flags & BDRV_O_RDWR);
Kevin Wolf20cca272014-06-04 14:33:27 +0200935 bs->growable = !!(flags & BDRV_O_PROTOCOL);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800936
937 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200938 error_setg(errp,
939 !bs->read_only && bdrv_is_whitelisted(drv, true)
940 ? "Driver '%s' can only be used for read-only devices"
941 : "Driver '%s' is not whitelisted",
942 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800943 return -ENOTSUP;
944 }
Kevin Wolf57915332010-04-14 15:24:50 +0200945
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000946 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200947 if (flags & BDRV_O_COPY_ON_READ) {
948 if (!bs->read_only) {
949 bdrv_enable_copy_on_read(bs);
950 } else {
951 error_setg(errp, "Can't use copy-on-read on read-only device");
952 return -EINVAL;
953 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000954 }
955
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100956 if (filename != NULL) {
957 pstrcpy(bs->filename, sizeof(bs->filename), filename);
958 } else {
959 bs->filename[0] = '\0';
960 }
Max Reitz91af7012014-07-18 20:24:56 +0200961 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200962
Kevin Wolf57915332010-04-14 15:24:50 +0200963 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500964 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200965
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100966 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100967
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200968 /* Open the image, either directly or using a protocol */
969 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100970 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200971 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200972 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100973 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200974 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200975 error_setg(errp, "Can't use '%s' as a block driver for the "
976 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200977 ret = -EINVAL;
978 goto free_and_fail;
979 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100980 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200981 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200982 }
983
Kevin Wolf57915332010-04-14 15:24:50 +0200984 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100985 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200986 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800987 } else if (bs->filename[0]) {
988 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200989 } else {
990 error_setg_errno(errp, -ret, "Could not open image");
991 }
Kevin Wolf57915332010-04-14 15:24:50 +0200992 goto free_and_fail;
993 }
994
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100995 ret = refresh_total_sectors(bs, bs->total_sectors);
996 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200997 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100998 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200999 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001000
Kevin Wolf3baca892014-07-16 17:48:16 +02001001 bdrv_refresh_limits(bs, &local_err);
1002 if (local_err) {
1003 error_propagate(errp, local_err);
1004 ret = -EINVAL;
1005 goto free_and_fail;
1006 }
1007
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001008 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001009 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001010 return 0;
1011
1012free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001013 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001014 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001015 bs->opaque = NULL;
1016 bs->drv = NULL;
1017 return ret;
1018}
1019
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001020static QDict *parse_json_filename(const char *filename, Error **errp)
1021{
1022 QObject *options_obj;
1023 QDict *options;
1024 int ret;
1025
1026 ret = strstart(filename, "json:", &filename);
1027 assert(ret);
1028
1029 options_obj = qobject_from_json(filename);
1030 if (!options_obj) {
1031 error_setg(errp, "Could not parse the JSON options");
1032 return NULL;
1033 }
1034
1035 if (qobject_type(options_obj) != QTYPE_QDICT) {
1036 qobject_decref(options_obj);
1037 error_setg(errp, "Invalid JSON object given");
1038 return NULL;
1039 }
1040
1041 options = qobject_to_qdict(options_obj);
1042 qdict_flatten(options);
1043
1044 return options;
1045}
1046
Kevin Wolf57915332010-04-14 15:24:50 +02001047/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001048 * Fills in default options for opening images and converts the legacy
1049 * filename/flags pair to option QDict entries.
1050 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001051static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001052 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001053{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001054 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001055 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001056 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001057 bool parse_filename = false;
1058 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001059
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001060 /* Parse json: pseudo-protocol */
1061 if (filename && g_str_has_prefix(filename, "json:")) {
1062 QDict *json_options = parse_json_filename(filename, &local_err);
1063 if (local_err) {
1064 error_propagate(errp, local_err);
1065 return -EINVAL;
1066 }
1067
1068 /* Options given in the filename have lower priority than options
1069 * specified directly */
1070 qdict_join(*options, json_options, false);
1071 QDECREF(json_options);
1072 *pfilename = filename = NULL;
1073 }
1074
Kevin Wolff54120f2014-05-26 11:09:59 +02001075 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001076 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001077 if (!qdict_haskey(*options, "filename")) {
1078 qdict_put(*options, "filename", qstring_from_str(filename));
1079 parse_filename = true;
1080 } else {
1081 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1082 "the same time");
1083 return -EINVAL;
1084 }
1085 }
1086
1087 /* Find the right block driver */
1088 filename = qdict_get_try_str(*options, "filename");
1089 drvname = qdict_get_try_str(*options, "driver");
1090
Kevin Wolf17b005f2014-05-27 10:50:29 +02001091 if (drv) {
1092 if (drvname) {
1093 error_setg(errp, "Driver specified twice");
1094 return -EINVAL;
1095 }
1096 drvname = drv->format_name;
1097 qdict_put(*options, "driver", qstring_from_str(drvname));
1098 } else {
1099 if (!drvname && protocol) {
1100 if (filename) {
1101 drv = bdrv_find_protocol(filename, parse_filename);
1102 if (!drv) {
1103 error_setg(errp, "Unknown protocol");
1104 return -EINVAL;
1105 }
1106
1107 drvname = drv->format_name;
1108 qdict_put(*options, "driver", qstring_from_str(drvname));
1109 } else {
1110 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001111 return -EINVAL;
1112 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001113 } else if (drvname) {
1114 drv = bdrv_find_format(drvname);
1115 if (!drv) {
1116 error_setg(errp, "Unknown driver '%s'", drvname);
1117 return -ENOENT;
1118 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001119 }
1120 }
1121
Kevin Wolf17b005f2014-05-27 10:50:29 +02001122 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001123
1124 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001125 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001126 drv->bdrv_parse_filename(filename, *options, &local_err);
1127 if (local_err) {
1128 error_propagate(errp, local_err);
1129 return -EINVAL;
1130 }
1131
1132 if (!drv->bdrv_needs_filename) {
1133 qdict_del(*options, "filename");
1134 }
1135 }
1136
1137 return 0;
1138}
1139
Fam Zheng8d24cce2014-05-23 21:29:45 +08001140void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1141{
1142
Fam Zheng826b6ca2014-05-23 21:29:47 +08001143 if (bs->backing_hd) {
1144 assert(bs->backing_blocker);
1145 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1146 } else if (backing_hd) {
1147 error_setg(&bs->backing_blocker,
1148 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001149 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001150 }
1151
Fam Zheng8d24cce2014-05-23 21:29:45 +08001152 bs->backing_hd = backing_hd;
1153 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001154 error_free(bs->backing_blocker);
1155 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001156 goto out;
1157 }
1158 bs->open_flags &= ~BDRV_O_NO_BACKING;
1159 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1160 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1161 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001162
1163 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1164 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1165 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1166 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001167out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001168 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001169}
1170
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001171/*
1172 * Opens the backing file for a BlockDriverState if not yet open
1173 *
1174 * options is a QDict of options to pass to the block drivers, or NULL for an
1175 * empty set of options. The reference to the QDict is transferred to this
1176 * function (even on failure), so if the caller intends to reuse the dictionary,
1177 * it needs to use QINCREF() before calling bdrv_file_open.
1178 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001179int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001180{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001181 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001182 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001183 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001184 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001185 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001186
1187 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001188 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001189 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001190 }
1191
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001192 /* NULL means an empty set of options */
1193 if (options == NULL) {
1194 options = qdict_new();
1195 }
1196
Paolo Bonzini9156df12012-10-18 16:49:17 +02001197 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001198 if (qdict_haskey(options, "file.filename")) {
1199 backing_filename[0] = '\0';
1200 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001201 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001202 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001203 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001204 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001205 }
1206
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001207 if (!bs->drv || !bs->drv->supports_backing) {
1208 ret = -EINVAL;
1209 error_setg(errp, "Driver doesn't support backing files");
1210 QDECREF(options);
1211 goto free_exit;
1212 }
1213
Markus Armbrustere4e99862014-10-07 13:59:03 +02001214 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001215
Paolo Bonzini9156df12012-10-18 16:49:17 +02001216 if (bs->backing_format[0] != '\0') {
1217 back_drv = bdrv_find_format(bs->backing_format);
1218 }
1219
Max Reitzf67503e2014-02-18 18:33:05 +01001220 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001221 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001222 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001223 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001224 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001225 bdrv_unref(backing_hd);
1226 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001227 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001228 error_setg(errp, "Could not open backing file: %s",
1229 error_get_pretty(local_err));
1230 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001231 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001232 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001233 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001234
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001235free_exit:
1236 g_free(backing_filename);
1237 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001238}
1239
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001240/*
Max Reitzda557aa2013-12-20 19:28:11 +01001241 * Opens a disk image whose options are given as BlockdevRef in another block
1242 * device's options.
1243 *
Max Reitzda557aa2013-12-20 19:28:11 +01001244 * If allow_none is true, no image will be opened if filename is false and no
1245 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1246 *
1247 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1248 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1249 * itself, all options starting with "${bdref_key}." are considered part of the
1250 * BlockdevRef.
1251 *
1252 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001253 *
1254 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001255 */
1256int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1257 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001258 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001259{
1260 QDict *image_options;
1261 int ret;
1262 char *bdref_key_dot;
1263 const char *reference;
1264
Max Reitzf67503e2014-02-18 18:33:05 +01001265 assert(pbs);
1266 assert(*pbs == NULL);
1267
Max Reitzda557aa2013-12-20 19:28:11 +01001268 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1269 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1270 g_free(bdref_key_dot);
1271
1272 reference = qdict_get_try_str(options, bdref_key);
1273 if (!filename && !reference && !qdict_size(image_options)) {
1274 if (allow_none) {
1275 ret = 0;
1276 } else {
1277 error_setg(errp, "A block device must be specified for \"%s\"",
1278 bdref_key);
1279 ret = -EINVAL;
1280 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001281 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001282 goto done;
1283 }
1284
Max Reitzf7d9fd82014-02-18 18:33:12 +01001285 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001286
1287done:
1288 qdict_del(options, bdref_key);
1289 return ret;
1290}
1291
Chen Gang6b8aeca2014-06-23 23:28:23 +08001292int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001293{
1294 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001295 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001296 int64_t total_size;
1297 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001298 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001299 QDict *snapshot_options;
1300 BlockDriverState *bs_snapshot;
1301 Error *local_err;
1302 int ret;
1303
1304 /* if snapshot, we create a temporary backing file and open it
1305 instead of opening 'filename' directly */
1306
1307 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001308 total_size = bdrv_getlength(bs);
1309 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001310 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001311 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001312 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001313 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001314
1315 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001316 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001317 if (ret < 0) {
1318 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001319 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001320 }
1321
1322 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001323 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1324 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001325 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001326 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001327 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001328 if (ret < 0) {
1329 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1330 "'%s': %s", tmp_filename,
1331 error_get_pretty(local_err));
1332 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001333 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001334 }
1335
1336 /* Prepare a new options QDict for the temporary file */
1337 snapshot_options = qdict_new();
1338 qdict_put(snapshot_options, "file.driver",
1339 qstring_from_str("file"));
1340 qdict_put(snapshot_options, "file.filename",
1341 qstring_from_str(tmp_filename));
1342
Markus Armbrustere4e99862014-10-07 13:59:03 +02001343 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001344
1345 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001346 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001347 if (ret < 0) {
1348 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001349 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001350 }
1351
1352 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001353
1354out:
1355 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001356 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001357}
1358
Max Reitzda557aa2013-12-20 19:28:11 +01001359/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001360 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001361 *
1362 * options is a QDict of options to pass to the block drivers, or NULL for an
1363 * empty set of options. The reference to the QDict belongs to the block layer
1364 * after the call (even on failure), so if the caller intends to reuse the
1365 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001366 *
1367 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1368 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001369 *
1370 * The reference parameter may be used to specify an existing block device which
1371 * should be opened. If specified, neither options nor a filename may be given,
1372 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001373 */
Max Reitzddf56362014-02-18 18:33:06 +01001374int bdrv_open(BlockDriverState **pbs, const char *filename,
1375 const char *reference, QDict *options, int flags,
1376 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001377{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001378 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001379 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001380 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001381 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001382 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001383
Max Reitzf67503e2014-02-18 18:33:05 +01001384 assert(pbs);
1385
Max Reitzddf56362014-02-18 18:33:06 +01001386 if (reference) {
1387 bool options_non_empty = options ? qdict_size(options) : false;
1388 QDECREF(options);
1389
1390 if (*pbs) {
1391 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1392 "another block device");
1393 return -EINVAL;
1394 }
1395
1396 if (filename || options_non_empty) {
1397 error_setg(errp, "Cannot reference an existing block device with "
1398 "additional options or a new filename");
1399 return -EINVAL;
1400 }
1401
1402 bs = bdrv_lookup_bs(reference, reference, errp);
1403 if (!bs) {
1404 return -ENODEV;
1405 }
1406 bdrv_ref(bs);
1407 *pbs = bs;
1408 return 0;
1409 }
1410
Max Reitzf67503e2014-02-18 18:33:05 +01001411 if (*pbs) {
1412 bs = *pbs;
1413 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001414 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001415 }
1416
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001417 /* NULL means an empty set of options */
1418 if (options == NULL) {
1419 options = qdict_new();
1420 }
1421
Kevin Wolf17b005f2014-05-27 10:50:29 +02001422 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001423 if (local_err) {
1424 goto fail;
1425 }
1426
Kevin Wolf76c591b2014-06-04 14:19:44 +02001427 /* Find the right image format driver */
1428 drv = NULL;
1429 drvname = qdict_get_try_str(options, "driver");
1430 if (drvname) {
1431 drv = bdrv_find_format(drvname);
1432 qdict_del(options, "driver");
1433 if (!drv) {
1434 error_setg(errp, "Unknown driver: '%s'", drvname);
1435 ret = -EINVAL;
1436 goto fail;
1437 }
1438 }
1439
1440 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1441 if (drv && !drv->bdrv_file_open) {
1442 /* If the user explicitly wants a format driver here, we'll need to add
1443 * another layer for the protocol in bs->file */
1444 flags &= ~BDRV_O_PROTOCOL;
1445 }
1446
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001447 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001448 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001449
Kevin Wolff4788ad2014-06-03 16:44:19 +02001450 /* Open image file without format layer */
1451 if ((flags & BDRV_O_PROTOCOL) == 0) {
1452 if (flags & BDRV_O_RDWR) {
1453 flags |= BDRV_O_ALLOW_RDWR;
1454 }
1455 if (flags & BDRV_O_SNAPSHOT) {
1456 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1457 flags = bdrv_backing_flags(flags);
1458 }
1459
1460 assert(file == NULL);
1461 ret = bdrv_open_image(&file, filename, options, "file",
1462 bdrv_inherited_flags(flags),
1463 true, &local_err);
1464 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001465 goto fail;
1466 }
1467 }
1468
Kevin Wolf76c591b2014-06-04 14:19:44 +02001469 /* Image format probing */
1470 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001471 ret = find_image_format(file, filename, &drv, &local_err);
1472 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001473 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001474 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001475 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001476 error_setg(errp, "Must specify either driver or file");
1477 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001478 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001479 }
1480
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001481 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001482 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001483 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001484 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001485 }
1486
Max Reitz2a05cbe2013-12-20 19:28:10 +01001487 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001488 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001489 file = NULL;
1490 }
1491
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001492 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001493 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001494 QDict *backing_options;
1495
Benoît Canet5726d872013-09-25 13:30:01 +02001496 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001497 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001498 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001499 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001500 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001501 }
1502
Max Reitz91af7012014-07-18 20:24:56 +02001503 bdrv_refresh_filename(bs);
1504
Kevin Wolfb9988752014-04-03 12:09:34 +02001505 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1506 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001507 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001508 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001509 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001510 goto close_and_fail;
1511 }
1512 }
1513
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001514 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001515 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001516 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001517 if (flags & BDRV_O_PROTOCOL) {
1518 error_setg(errp, "Block protocol '%s' doesn't support the option "
1519 "'%s'", drv->format_name, entry->key);
1520 } else {
1521 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1522 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001523 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001524 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001525
1526 ret = -EINVAL;
1527 goto close_and_fail;
1528 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001529
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001530 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001531 if (bs->blk) {
1532 blk_dev_change_media_cb(bs->blk, true);
1533 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001534 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1535 && !runstate_check(RUN_STATE_INMIGRATE)
1536 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1537 error_setg(errp,
1538 "Guest must be stopped for opening of encrypted image");
1539 ret = -EBUSY;
1540 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001541 }
1542
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001543 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001544 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001545 return 0;
1546
Kevin Wolf8bfea152014-04-11 19:16:36 +02001547fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001548 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001549 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001550 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001551 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001552 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001553 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001554 if (!*pbs) {
1555 /* If *pbs is NULL, a new BDS has been created in this function and
1556 needs to be freed now. Otherwise, it does not need to be closed,
1557 since it has not really been opened yet. */
1558 bdrv_unref(bs);
1559 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001560 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001561 error_propagate(errp, local_err);
1562 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001563 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001564
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001565close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001566 /* See fail path, but now the BDS has to be always closed */
1567 if (*pbs) {
1568 bdrv_close(bs);
1569 } else {
1570 bdrv_unref(bs);
1571 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001572 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001573 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001574 error_propagate(errp, local_err);
1575 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001576 return ret;
1577}
1578
Jeff Codye971aa12012-09-20 15:13:19 -04001579typedef struct BlockReopenQueueEntry {
1580 bool prepared;
1581 BDRVReopenState state;
1582 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1583} BlockReopenQueueEntry;
1584
1585/*
1586 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1587 * reopen of multiple devices.
1588 *
1589 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1590 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1591 * be created and initialized. This newly created BlockReopenQueue should be
1592 * passed back in for subsequent calls that are intended to be of the same
1593 * atomic 'set'.
1594 *
1595 * bs is the BlockDriverState to add to the reopen queue.
1596 *
1597 * flags contains the open flags for the associated bs
1598 *
1599 * returns a pointer to bs_queue, which is either the newly allocated
1600 * bs_queue, or the existing bs_queue being used.
1601 *
1602 */
1603BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1604 BlockDriverState *bs, int flags)
1605{
1606 assert(bs != NULL);
1607
1608 BlockReopenQueueEntry *bs_entry;
1609 if (bs_queue == NULL) {
1610 bs_queue = g_new0(BlockReopenQueue, 1);
1611 QSIMPLEQ_INIT(bs_queue);
1612 }
1613
Kevin Wolff1f25a22014-04-25 19:04:55 +02001614 /* bdrv_open() masks this flag out */
1615 flags &= ~BDRV_O_PROTOCOL;
1616
Jeff Codye971aa12012-09-20 15:13:19 -04001617 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001618 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001619 }
1620
1621 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1622 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1623
1624 bs_entry->state.bs = bs;
1625 bs_entry->state.flags = flags;
1626
1627 return bs_queue;
1628}
1629
1630/*
1631 * Reopen multiple BlockDriverStates atomically & transactionally.
1632 *
1633 * The queue passed in (bs_queue) must have been built up previous
1634 * via bdrv_reopen_queue().
1635 *
1636 * Reopens all BDS specified in the queue, with the appropriate
1637 * flags. All devices are prepared for reopen, and failure of any
1638 * device will cause all device changes to be abandonded, and intermediate
1639 * data cleaned up.
1640 *
1641 * If all devices prepare successfully, then the changes are committed
1642 * to all devices.
1643 *
1644 */
1645int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1646{
1647 int ret = -1;
1648 BlockReopenQueueEntry *bs_entry, *next;
1649 Error *local_err = NULL;
1650
1651 assert(bs_queue != NULL);
1652
1653 bdrv_drain_all();
1654
1655 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1656 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1657 error_propagate(errp, local_err);
1658 goto cleanup;
1659 }
1660 bs_entry->prepared = true;
1661 }
1662
1663 /* If we reach this point, we have success and just need to apply the
1664 * changes
1665 */
1666 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1667 bdrv_reopen_commit(&bs_entry->state);
1668 }
1669
1670 ret = 0;
1671
1672cleanup:
1673 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1674 if (ret && bs_entry->prepared) {
1675 bdrv_reopen_abort(&bs_entry->state);
1676 }
1677 g_free(bs_entry);
1678 }
1679 g_free(bs_queue);
1680 return ret;
1681}
1682
1683
1684/* Reopen a single BlockDriverState with the specified flags. */
1685int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1686{
1687 int ret = -1;
1688 Error *local_err = NULL;
1689 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1690
1691 ret = bdrv_reopen_multiple(queue, &local_err);
1692 if (local_err != NULL) {
1693 error_propagate(errp, local_err);
1694 }
1695 return ret;
1696}
1697
1698
1699/*
1700 * Prepares a BlockDriverState for reopen. All changes are staged in the
1701 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1702 * the block driver layer .bdrv_reopen_prepare()
1703 *
1704 * bs is the BlockDriverState to reopen
1705 * flags are the new open flags
1706 * queue is the reopen queue
1707 *
1708 * Returns 0 on success, non-zero on error. On error errp will be set
1709 * as well.
1710 *
1711 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1712 * It is the responsibility of the caller to then call the abort() or
1713 * commit() for any other BDS that have been left in a prepare() state
1714 *
1715 */
1716int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1717 Error **errp)
1718{
1719 int ret = -1;
1720 Error *local_err = NULL;
1721 BlockDriver *drv;
1722
1723 assert(reopen_state != NULL);
1724 assert(reopen_state->bs->drv != NULL);
1725 drv = reopen_state->bs->drv;
1726
1727 /* if we are to stay read-only, do not allow permission change
1728 * to r/w */
1729 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1730 reopen_state->flags & BDRV_O_RDWR) {
1731 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001732 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001733 goto error;
1734 }
1735
1736
1737 ret = bdrv_flush(reopen_state->bs);
1738 if (ret) {
1739 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1740 strerror(-ret));
1741 goto error;
1742 }
1743
1744 if (drv->bdrv_reopen_prepare) {
1745 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1746 if (ret) {
1747 if (local_err != NULL) {
1748 error_propagate(errp, local_err);
1749 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001750 error_setg(errp, "failed while preparing to reopen image '%s'",
1751 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001752 }
1753 goto error;
1754 }
1755 } else {
1756 /* It is currently mandatory to have a bdrv_reopen_prepare()
1757 * handler for each supported drv. */
1758 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001759 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001760 "reopening of file");
1761 ret = -1;
1762 goto error;
1763 }
1764
1765 ret = 0;
1766
1767error:
1768 return ret;
1769}
1770
1771/*
1772 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1773 * makes them final by swapping the staging BlockDriverState contents into
1774 * the active BlockDriverState contents.
1775 */
1776void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1777{
1778 BlockDriver *drv;
1779
1780 assert(reopen_state != NULL);
1781 drv = reopen_state->bs->drv;
1782 assert(drv != NULL);
1783
1784 /* If there are any driver level actions to take */
1785 if (drv->bdrv_reopen_commit) {
1786 drv->bdrv_reopen_commit(reopen_state);
1787 }
1788
1789 /* set BDS specific flags now */
1790 reopen_state->bs->open_flags = reopen_state->flags;
1791 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1792 BDRV_O_CACHE_WB);
1793 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001794
Kevin Wolf3baca892014-07-16 17:48:16 +02001795 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001796}
1797
1798/*
1799 * Abort the reopen, and delete and free the staged changes in
1800 * reopen_state
1801 */
1802void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1803{
1804 BlockDriver *drv;
1805
1806 assert(reopen_state != NULL);
1807 drv = reopen_state->bs->drv;
1808 assert(drv != NULL);
1809
1810 if (drv->bdrv_reopen_abort) {
1811 drv->bdrv_reopen_abort(reopen_state);
1812 }
1813}
1814
1815
bellardfc01f7e2003-06-30 10:03:06 +00001816void bdrv_close(BlockDriverState *bs)
1817{
Max Reitz33384422014-06-20 21:57:33 +02001818 BdrvAioNotifier *ban, *ban_next;
1819
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001820 if (bs->job) {
1821 block_job_cancel_sync(bs->job);
1822 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001823 bdrv_drain_all(); /* complete I/O */
1824 bdrv_flush(bs);
1825 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001826 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001827
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001828 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001829 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001830 BlockDriverState *backing_hd = bs->backing_hd;
1831 bdrv_set_backing_hd(bs, NULL);
1832 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001833 }
bellardea2384d2004-08-01 21:59:26 +00001834 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001835 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001836 bs->opaque = NULL;
1837 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001838 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001839 bs->backing_file[0] = '\0';
1840 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001841 bs->total_sectors = 0;
1842 bs->encrypted = 0;
1843 bs->valid_key = 0;
1844 bs->sg = 0;
1845 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001846 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001847 QDECREF(bs->options);
1848 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001849 QDECREF(bs->full_open_options);
1850 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001851
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001852 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001853 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001854 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001855 }
bellardb3380822004-03-14 21:38:54 +00001856 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001857
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001858 if (bs->blk) {
1859 blk_dev_change_media_cb(bs->blk, false);
1860 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001861
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001862 /*throttling disk I/O limits*/
1863 if (bs->io_limits_enabled) {
1864 bdrv_io_limits_disable(bs);
1865 }
Max Reitz33384422014-06-20 21:57:33 +02001866
1867 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1868 g_free(ban);
1869 }
1870 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001871}
1872
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001873void bdrv_close_all(void)
1874{
1875 BlockDriverState *bs;
1876
Benoît Canetdc364f42014-01-23 21:31:32 +01001877 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001878 AioContext *aio_context = bdrv_get_aio_context(bs);
1879
1880 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001881 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001882 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001883 }
1884}
1885
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001886/* Check if any requests are in-flight (including throttled requests) */
1887static bool bdrv_requests_pending(BlockDriverState *bs)
1888{
1889 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1890 return true;
1891 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001892 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1893 return true;
1894 }
1895 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001896 return true;
1897 }
1898 if (bs->file && bdrv_requests_pending(bs->file)) {
1899 return true;
1900 }
1901 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1902 return true;
1903 }
1904 return false;
1905}
1906
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001907/*
1908 * Wait for pending requests to complete across all BlockDriverStates
1909 *
1910 * This function does not flush data to disk, use bdrv_flush_all() for that
1911 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001912 *
1913 * Note that completion of an asynchronous I/O operation can trigger any
1914 * number of other I/O operations on other devices---for example a coroutine
1915 * can be arbitrarily complex and a constant flow of I/O can come until the
1916 * coroutine is complete. Because of this, it is not possible to have a
1917 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001918 */
1919void bdrv_drain_all(void)
1920{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001921 /* Always run first iteration so any pending completion BHs run */
1922 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001923 BlockDriverState *bs;
1924
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001925 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001926 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001927
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001928 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1929 AioContext *aio_context = bdrv_get_aio_context(bs);
1930 bool bs_busy;
1931
1932 aio_context_acquire(aio_context);
Ming Lei448ad912014-07-04 18:04:33 +08001933 bdrv_flush_io_queue(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001934 bdrv_start_throttled_reqs(bs);
1935 bs_busy = bdrv_requests_pending(bs);
1936 bs_busy |= aio_poll(aio_context, bs_busy);
1937 aio_context_release(aio_context);
1938
1939 busy |= bs_busy;
1940 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001941 }
1942}
1943
Benoît Canetdc364f42014-01-23 21:31:32 +01001944/* make a BlockDriverState anonymous by removing from bdrv_state and
1945 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001946 Also, NULL terminate the device_name to prevent double remove */
1947void bdrv_make_anon(BlockDriverState *bs)
1948{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001949 /*
1950 * Take care to remove bs from bdrv_states only when it's actually
1951 * in it. Note that bs->device_list.tqe_prev is initially null,
1952 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1953 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1954 * resetting it to null on remove.
1955 */
1956 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001957 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001958 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05001959 }
Benoît Canetdc364f42014-01-23 21:31:32 +01001960 if (bs->node_name[0] != '\0') {
1961 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1962 }
1963 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001964}
1965
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001966static void bdrv_rebind(BlockDriverState *bs)
1967{
1968 if (bs->drv && bs->drv->bdrv_rebind) {
1969 bs->drv->bdrv_rebind(bs);
1970 }
1971}
1972
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001973static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1974 BlockDriverState *bs_src)
1975{
1976 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001977
1978 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001979 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001980 bs_dest->copy_on_read = bs_src->copy_on_read;
1981
1982 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1983
Benoît Canetcc0681c2013-09-02 14:14:39 +02001984 /* i/o throttled req */
1985 memcpy(&bs_dest->throttle_state,
1986 &bs_src->throttle_state,
1987 sizeof(ThrottleState));
1988 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1989 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001990 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1991
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001992 /* r/w error */
1993 bs_dest->on_read_error = bs_src->on_read_error;
1994 bs_dest->on_write_error = bs_src->on_write_error;
1995
1996 /* i/o status */
1997 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1998 bs_dest->iostatus = bs_src->iostatus;
1999
2000 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002001 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002002
Fam Zheng9fcb0252013-08-23 09:14:46 +08002003 /* reference count */
2004 bs_dest->refcnt = bs_src->refcnt;
2005
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002006 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002007 bs_dest->job = bs_src->job;
2008
2009 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002010 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002011 bs_dest->blk = bs_src->blk;
2012
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002013 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2014 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002015}
2016
2017/*
2018 * Swap bs contents for two image chains while they are live,
2019 * while keeping required fields on the BlockDriverState that is
2020 * actually attached to a device.
2021 *
2022 * This will modify the BlockDriverState fields, and swap contents
2023 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2024 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002025 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002026 *
2027 * This function does not create any image files.
2028 */
2029void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2030{
2031 BlockDriverState tmp;
2032
Benoît Canet90ce8a02014-03-05 23:48:29 +01002033 /* The code needs to swap the node_name but simply swapping node_list won't
2034 * work so first remove the nodes from the graph list, do the swap then
2035 * insert them back if needed.
2036 */
2037 if (bs_new->node_name[0] != '\0') {
2038 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2039 }
2040 if (bs_old->node_name[0] != '\0') {
2041 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2042 }
2043
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002044 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002045 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002046 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002047 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002048 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002049 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002050
2051 tmp = *bs_new;
2052 *bs_new = *bs_old;
2053 *bs_old = tmp;
2054
2055 /* there are some fields that should not be swapped, move them back */
2056 bdrv_move_feature_fields(&tmp, bs_old);
2057 bdrv_move_feature_fields(bs_old, bs_new);
2058 bdrv_move_feature_fields(bs_new, &tmp);
2059
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002060 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002061 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002062
2063 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002064 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002065 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002066 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002067
Benoît Canet90ce8a02014-03-05 23:48:29 +01002068 /* insert the nodes back into the graph node list if needed */
2069 if (bs_new->node_name[0] != '\0') {
2070 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2071 }
2072 if (bs_old->node_name[0] != '\0') {
2073 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2074 }
2075
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002076 bdrv_rebind(bs_new);
2077 bdrv_rebind(bs_old);
2078}
2079
Jeff Cody8802d1f2012-02-28 15:54:06 -05002080/*
2081 * Add new bs contents at the top of an image chain while the chain is
2082 * live, while keeping required fields on the top layer.
2083 *
2084 * This will modify the BlockDriverState fields, and swap contents
2085 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2086 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002087 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002088 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002089 * This function does not create any image files.
2090 */
2091void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2092{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002093 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002094
2095 /* The contents of 'tmp' will become bs_top, as we are
2096 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002097 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002098}
2099
Fam Zheng4f6fd342013-08-23 09:14:47 +08002100static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002101{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002102 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002103 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002104 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002105 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002106
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002107 bdrv_close(bs);
2108
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002109 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002110 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002111
Anthony Liguori7267c092011-08-20 22:09:37 -05002112 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002113}
2114
aliguorie97fc192009-04-21 23:11:50 +00002115/*
2116 * Run consistency checks on an image
2117 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002118 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002119 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002120 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002121 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002122int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002123{
Max Reitz908bcd52014-08-07 22:47:55 +02002124 if (bs->drv == NULL) {
2125 return -ENOMEDIUM;
2126 }
aliguorie97fc192009-04-21 23:11:50 +00002127 if (bs->drv->bdrv_check == NULL) {
2128 return -ENOTSUP;
2129 }
2130
Kevin Wolfe076f332010-06-29 11:43:13 +02002131 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002132 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002133}
2134
Kevin Wolf8a426612010-07-16 17:17:01 +02002135#define COMMIT_BUF_SECTORS 2048
2136
bellard33e39632003-07-06 17:15:21 +00002137/* commit COW file into the raw image */
2138int bdrv_commit(BlockDriverState *bs)
2139{
bellard19cb3732006-08-19 11:45:59 +00002140 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002141 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002142 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002143 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002144 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002145 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002146
bellard19cb3732006-08-19 11:45:59 +00002147 if (!drv)
2148 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002149
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002150 if (!bs->backing_hd) {
2151 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002152 }
2153
Fam Zheng3718d8a2014-05-23 21:29:43 +08002154 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2155 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002156 return -EBUSY;
2157 }
2158
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002159 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002160 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2161 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002162 open_flags = bs->backing_hd->open_flags;
2163
2164 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002165 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2166 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002167 }
bellard33e39632003-07-06 17:15:21 +00002168 }
bellardea2384d2004-08-01 21:59:26 +00002169
Jeff Cody72706ea2014-01-24 09:02:35 -05002170 length = bdrv_getlength(bs);
2171 if (length < 0) {
2172 ret = length;
2173 goto ro_cleanup;
2174 }
2175
2176 backing_length = bdrv_getlength(bs->backing_hd);
2177 if (backing_length < 0) {
2178 ret = backing_length;
2179 goto ro_cleanup;
2180 }
2181
2182 /* If our top snapshot is larger than the backing file image,
2183 * grow the backing file image if possible. If not possible,
2184 * we must return an error */
2185 if (length > backing_length) {
2186 ret = bdrv_truncate(bs->backing_hd, length);
2187 if (ret < 0) {
2188 goto ro_cleanup;
2189 }
2190 }
2191
2192 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002193
2194 /* qemu_try_blockalign() for bs will choose an alignment that works for
2195 * bs->backing_hd as well, so no need to compare the alignment manually. */
2196 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2197 if (buf == NULL) {
2198 ret = -ENOMEM;
2199 goto ro_cleanup;
2200 }
bellardea2384d2004-08-01 21:59:26 +00002201
Kevin Wolf8a426612010-07-16 17:17:01 +02002202 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002203 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2204 if (ret < 0) {
2205 goto ro_cleanup;
2206 }
2207 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002208 ret = bdrv_read(bs, sector, buf, n);
2209 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002210 goto ro_cleanup;
2211 }
2212
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002213 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2214 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002215 goto ro_cleanup;
2216 }
bellardea2384d2004-08-01 21:59:26 +00002217 }
2218 }
bellard95389c82005-12-18 18:28:15 +00002219
Christoph Hellwig1d449522010-01-17 12:32:30 +01002220 if (drv->bdrv_make_empty) {
2221 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002222 if (ret < 0) {
2223 goto ro_cleanup;
2224 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002225 bdrv_flush(bs);
2226 }
bellard95389c82005-12-18 18:28:15 +00002227
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002228 /*
2229 * Make sure all data we wrote to the backing device is actually
2230 * stable on disk.
2231 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002232 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002233 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002234 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002235
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002236 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002237ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002238 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002239
2240 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002241 /* ignoring error return here */
2242 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002243 }
2244
Christoph Hellwig1d449522010-01-17 12:32:30 +01002245 return ret;
bellard33e39632003-07-06 17:15:21 +00002246}
2247
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002248int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002249{
2250 BlockDriverState *bs;
2251
Benoît Canetdc364f42014-01-23 21:31:32 +01002252 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002253 AioContext *aio_context = bdrv_get_aio_context(bs);
2254
2255 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002256 if (bs->drv && bs->backing_hd) {
2257 int ret = bdrv_commit(bs);
2258 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002259 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002260 return ret;
2261 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002262 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002263 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002264 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002265 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002266}
2267
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002268/**
2269 * Remove an active request from the tracked requests list
2270 *
2271 * This function should be called when a tracked request is completing.
2272 */
2273static void tracked_request_end(BdrvTrackedRequest *req)
2274{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002275 if (req->serialising) {
2276 req->bs->serialising_in_flight--;
2277 }
2278
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002279 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002280 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002281}
2282
2283/**
2284 * Add an active request to the tracked requests list
2285 */
2286static void tracked_request_begin(BdrvTrackedRequest *req,
2287 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002288 int64_t offset,
2289 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002290{
2291 *req = (BdrvTrackedRequest){
2292 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002293 .offset = offset,
2294 .bytes = bytes,
2295 .is_write = is_write,
2296 .co = qemu_coroutine_self(),
2297 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002298 .overlap_offset = offset,
2299 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002300 };
2301
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002302 qemu_co_queue_init(&req->wait_queue);
2303
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002304 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2305}
2306
Kevin Wolfe96126f2014-02-08 10:42:18 +01002307static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002308{
Kevin Wolf73271452013-12-04 17:08:50 +01002309 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002310 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2311 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002312
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002313 if (!req->serialising) {
2314 req->bs->serialising_in_flight++;
2315 req->serialising = true;
2316 }
Kevin Wolf73271452013-12-04 17:08:50 +01002317
2318 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2319 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002320}
2321
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002322/**
2323 * Round a region to cluster boundaries
2324 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002325void bdrv_round_to_clusters(BlockDriverState *bs,
2326 int64_t sector_num, int nb_sectors,
2327 int64_t *cluster_sector_num,
2328 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002329{
2330 BlockDriverInfo bdi;
2331
2332 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2333 *cluster_sector_num = sector_num;
2334 *cluster_nb_sectors = nb_sectors;
2335 } else {
2336 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2337 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2338 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2339 nb_sectors, c);
2340 }
2341}
2342
Kevin Wolf73271452013-12-04 17:08:50 +01002343static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002344{
2345 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002346 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002347
Kevin Wolf73271452013-12-04 17:08:50 +01002348 ret = bdrv_get_info(bs, &bdi);
2349 if (ret < 0 || bdi.cluster_size == 0) {
2350 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002351 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002352 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002353 }
2354}
2355
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002356static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002357 int64_t offset, unsigned int bytes)
2358{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002359 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002360 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002361 return false;
2362 }
2363 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002364 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002365 return false;
2366 }
2367 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002368}
2369
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002370static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002371{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002372 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002373 BdrvTrackedRequest *req;
2374 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002375 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002376
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002377 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002378 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002379 }
2380
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002381 do {
2382 retry = false;
2383 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002384 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002385 continue;
2386 }
Kevin Wolf73271452013-12-04 17:08:50 +01002387 if (tracked_request_overlaps(req, self->overlap_offset,
2388 self->overlap_bytes))
2389 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002390 /* Hitting this means there was a reentrant request, for
2391 * example, a block driver issuing nested requests. This must
2392 * never happen since it means deadlock.
2393 */
2394 assert(qemu_coroutine_self() != req->co);
2395
Kevin Wolf64604402013-12-13 13:04:35 +01002396 /* If the request is already (indirectly) waiting for us, or
2397 * will wait for us as soon as it wakes up, then just go on
2398 * (instead of producing a deadlock in the former case). */
2399 if (!req->waiting_for) {
2400 self->waiting_for = req;
2401 qemu_co_queue_wait(&req->wait_queue);
2402 self->waiting_for = NULL;
2403 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002404 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002405 break;
2406 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002407 }
2408 }
2409 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002410
2411 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002412}
2413
Kevin Wolf756e6732010-01-12 12:55:17 +01002414/*
2415 * Return values:
2416 * 0 - success
2417 * -EINVAL - backing format specified, but no file
2418 * -ENOSPC - can't update the backing file because no space is left in the
2419 * image file header
2420 * -ENOTSUP - format driver doesn't support changing the backing file
2421 */
2422int bdrv_change_backing_file(BlockDriverState *bs,
2423 const char *backing_file, const char *backing_fmt)
2424{
2425 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002426 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002427
Paolo Bonzini5f377792012-04-12 14:01:01 +02002428 /* Backing file format doesn't make sense without a backing file */
2429 if (backing_fmt && !backing_file) {
2430 return -EINVAL;
2431 }
2432
Kevin Wolf756e6732010-01-12 12:55:17 +01002433 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002434 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002435 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002436 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002437 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002438
2439 if (ret == 0) {
2440 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2441 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2442 }
2443 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002444}
2445
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002446/*
2447 * Finds the image layer in the chain that has 'bs' as its backing file.
2448 *
2449 * active is the current topmost image.
2450 *
2451 * Returns NULL if bs is not found in active's image chain,
2452 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002453 *
2454 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002455 */
2456BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2457 BlockDriverState *bs)
2458{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002459 while (active && bs != active->backing_hd) {
2460 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002461 }
2462
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002463 return active;
2464}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002465
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002466/* Given a BDS, searches for the base layer. */
2467BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2468{
2469 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002470}
2471
2472typedef struct BlkIntermediateStates {
2473 BlockDriverState *bs;
2474 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2475} BlkIntermediateStates;
2476
2477
2478/*
2479 * Drops images above 'base' up to and including 'top', and sets the image
2480 * above 'top' to have base as its backing file.
2481 *
2482 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2483 * information in 'bs' can be properly updated.
2484 *
2485 * E.g., this will convert the following chain:
2486 * bottom <- base <- intermediate <- top <- active
2487 *
2488 * to
2489 *
2490 * bottom <- base <- active
2491 *
2492 * It is allowed for bottom==base, in which case it converts:
2493 *
2494 * base <- intermediate <- top <- active
2495 *
2496 * to
2497 *
2498 * base <- active
2499 *
Jeff Cody54e26902014-06-25 15:40:10 -04002500 * If backing_file_str is non-NULL, it will be used when modifying top's
2501 * overlay image metadata.
2502 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002503 * Error conditions:
2504 * if active == top, that is considered an error
2505 *
2506 */
2507int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002508 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002509{
2510 BlockDriverState *intermediate;
2511 BlockDriverState *base_bs = NULL;
2512 BlockDriverState *new_top_bs = NULL;
2513 BlkIntermediateStates *intermediate_state, *next;
2514 int ret = -EIO;
2515
2516 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2517 QSIMPLEQ_INIT(&states_to_delete);
2518
2519 if (!top->drv || !base->drv) {
2520 goto exit;
2521 }
2522
2523 new_top_bs = bdrv_find_overlay(active, top);
2524
2525 if (new_top_bs == NULL) {
2526 /* we could not find the image above 'top', this is an error */
2527 goto exit;
2528 }
2529
2530 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2531 * to do, no intermediate images */
2532 if (new_top_bs->backing_hd == base) {
2533 ret = 0;
2534 goto exit;
2535 }
2536
2537 intermediate = top;
2538
2539 /* now we will go down through the list, and add each BDS we find
2540 * into our deletion queue, until we hit the 'base'
2541 */
2542 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002543 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002544 intermediate_state->bs = intermediate;
2545 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2546
2547 if (intermediate->backing_hd == base) {
2548 base_bs = intermediate->backing_hd;
2549 break;
2550 }
2551 intermediate = intermediate->backing_hd;
2552 }
2553 if (base_bs == NULL) {
2554 /* something went wrong, we did not end at the base. safely
2555 * unravel everything, and exit with error */
2556 goto exit;
2557 }
2558
2559 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002560 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2561 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002562 base_bs->drv ? base_bs->drv->format_name : "");
2563 if (ret) {
2564 goto exit;
2565 }
Fam Zheng920beae2014-05-23 21:29:46 +08002566 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002567
2568 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2569 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002570 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002571 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002572 }
2573 ret = 0;
2574
2575exit:
2576 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2577 g_free(intermediate_state);
2578 }
2579 return ret;
2580}
2581
2582
aliguori71d07702009-03-03 17:37:16 +00002583static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2584 size_t size)
2585{
2586 int64_t len;
2587
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002588 if (size > INT_MAX) {
2589 return -EIO;
2590 }
2591
aliguori71d07702009-03-03 17:37:16 +00002592 if (!bdrv_is_inserted(bs))
2593 return -ENOMEDIUM;
2594
2595 if (bs->growable)
2596 return 0;
2597
2598 len = bdrv_getlength(bs);
2599
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002600 if (offset < 0)
2601 return -EIO;
2602
2603 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002604 return -EIO;
2605
2606 return 0;
2607}
2608
2609static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2610 int nb_sectors)
2611{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002612 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002613 return -EIO;
2614 }
2615
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002616 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2617 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002618}
2619
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002620typedef struct RwCo {
2621 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002622 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002623 QEMUIOVector *qiov;
2624 bool is_write;
2625 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002626 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002627} RwCo;
2628
2629static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2630{
2631 RwCo *rwco = opaque;
2632
2633 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002634 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2635 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002636 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002637 } else {
2638 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2639 rwco->qiov->size, rwco->qiov,
2640 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002641 }
2642}
2643
2644/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002645 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002646 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002647static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2648 QEMUIOVector *qiov, bool is_write,
2649 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002650{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002651 Coroutine *co;
2652 RwCo rwco = {
2653 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002654 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002655 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002656 .is_write = is_write,
2657 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002658 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002659 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002660
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002661 /**
2662 * In sync call context, when the vcpu is blocked, this throttling timer
2663 * will not fire; so the I/O throttling function has to be disabled here
2664 * if it has been enabled.
2665 */
2666 if (bs->io_limits_enabled) {
2667 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2668 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2669 bdrv_io_limits_disable(bs);
2670 }
2671
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002672 if (qemu_in_coroutine()) {
2673 /* Fast-path if already in coroutine context */
2674 bdrv_rw_co_entry(&rwco);
2675 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002676 AioContext *aio_context = bdrv_get_aio_context(bs);
2677
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002678 co = qemu_coroutine_create(bdrv_rw_co_entry);
2679 qemu_coroutine_enter(co, &rwco);
2680 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002681 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002682 }
2683 }
2684 return rwco.ret;
2685}
2686
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002687/*
2688 * Process a synchronous request using coroutines
2689 */
2690static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002691 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002692{
2693 QEMUIOVector qiov;
2694 struct iovec iov = {
2695 .iov_base = (void *)buf,
2696 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2697 };
2698
Kevin Wolfda15ee52014-04-14 15:39:36 +02002699 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2700 return -EINVAL;
2701 }
2702
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002703 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002704 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2705 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002706}
2707
bellard19cb3732006-08-19 11:45:59 +00002708/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002709int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002710 uint8_t *buf, int nb_sectors)
2711{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002712 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002713}
2714
Markus Armbruster07d27a42012-06-29 17:34:29 +02002715/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2716int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2717 uint8_t *buf, int nb_sectors)
2718{
2719 bool enabled;
2720 int ret;
2721
2722 enabled = bs->io_limits_enabled;
2723 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002724 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002725 bs->io_limits_enabled = enabled;
2726 return ret;
2727}
2728
ths5fafdf22007-09-16 21:08:06 +00002729/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002730 -EIO generic I/O error (may happen for all errors)
2731 -ENOMEDIUM No media inserted.
2732 -EINVAL Invalid sector number or nb_sectors
2733 -EACCES Trying to write a read-only device
2734*/
ths5fafdf22007-09-16 21:08:06 +00002735int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002736 const uint8_t *buf, int nb_sectors)
2737{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002738 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002739}
2740
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002741int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2742 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002743{
2744 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002745 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002746}
2747
Peter Lievend75cbb52013-10-24 12:07:03 +02002748/*
2749 * Completely zero out a block device with the help of bdrv_write_zeroes.
2750 * The operation is sped up by checking the block status and only writing
2751 * zeroes to the device if they currently do not return zeroes. Optional
2752 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2753 *
2754 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2755 */
2756int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2757{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002758 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002759 int n;
2760
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002761 target_sectors = bdrv_nb_sectors(bs);
2762 if (target_sectors < 0) {
2763 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002764 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002765
Peter Lievend75cbb52013-10-24 12:07:03 +02002766 for (;;) {
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002767 nb_sectors = target_sectors - sector_num;
Peter Lievend75cbb52013-10-24 12:07:03 +02002768 if (nb_sectors <= 0) {
2769 return 0;
2770 }
2771 if (nb_sectors > INT_MAX) {
2772 nb_sectors = INT_MAX;
2773 }
2774 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002775 if (ret < 0) {
2776 error_report("error getting block status at sector %" PRId64 ": %s",
2777 sector_num, strerror(-ret));
2778 return ret;
2779 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002780 if (ret & BDRV_BLOCK_ZERO) {
2781 sector_num += n;
2782 continue;
2783 }
2784 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2785 if (ret < 0) {
2786 error_report("error writing zeroes at sector %" PRId64 ": %s",
2787 sector_num, strerror(-ret));
2788 return ret;
2789 }
2790 sector_num += n;
2791 }
2792}
2793
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002794int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002795{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002796 QEMUIOVector qiov;
2797 struct iovec iov = {
2798 .iov_base = (void *)buf,
2799 .iov_len = bytes,
2800 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002801 int ret;
bellard83f64092006-08-01 16:21:11 +00002802
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002803 if (bytes < 0) {
2804 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002805 }
2806
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002807 qemu_iovec_init_external(&qiov, &iov, 1);
2808 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2809 if (ret < 0) {
2810 return ret;
bellard83f64092006-08-01 16:21:11 +00002811 }
2812
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002813 return bytes;
bellard83f64092006-08-01 16:21:11 +00002814}
2815
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002816int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002817{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002818 int ret;
bellard83f64092006-08-01 16:21:11 +00002819
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002820 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2821 if (ret < 0) {
2822 return ret;
bellard83f64092006-08-01 16:21:11 +00002823 }
2824
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002825 return qiov->size;
2826}
2827
2828int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002829 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002830{
2831 QEMUIOVector qiov;
2832 struct iovec iov = {
2833 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002834 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002835 };
2836
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002837 if (bytes < 0) {
2838 return -EINVAL;
2839 }
2840
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002841 qemu_iovec_init_external(&qiov, &iov, 1);
2842 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002843}
bellard83f64092006-08-01 16:21:11 +00002844
Kevin Wolff08145f2010-06-16 16:38:15 +02002845/*
2846 * Writes to the file and ensures that no writes are reordered across this
2847 * request (acts as a barrier)
2848 *
2849 * Returns 0 on success, -errno in error cases.
2850 */
2851int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2852 const void *buf, int count)
2853{
2854 int ret;
2855
2856 ret = bdrv_pwrite(bs, offset, buf, count);
2857 if (ret < 0) {
2858 return ret;
2859 }
2860
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002861 /* No flush needed for cache modes that already do it */
2862 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002863 bdrv_flush(bs);
2864 }
2865
2866 return 0;
2867}
2868
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002869static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002870 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2871{
2872 /* Perform I/O through a temporary buffer so that users who scribble over
2873 * their read buffer while the operation is in progress do not end up
2874 * modifying the image file. This is critical for zero-copy guest I/O
2875 * where anything might happen inside guest memory.
2876 */
2877 void *bounce_buffer;
2878
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002879 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002880 struct iovec iov;
2881 QEMUIOVector bounce_qiov;
2882 int64_t cluster_sector_num;
2883 int cluster_nb_sectors;
2884 size_t skip_bytes;
2885 int ret;
2886
2887 /* Cover entire cluster so no additional backing file I/O is required when
2888 * allocating cluster in the image file.
2889 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002890 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2891 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002892
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002893 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2894 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002895
2896 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002897 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
2898 if (bounce_buffer == NULL) {
2899 ret = -ENOMEM;
2900 goto err;
2901 }
2902
Stefan Hajnocziab185922011-11-17 13:40:31 +00002903 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2904
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002905 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2906 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002907 if (ret < 0) {
2908 goto err;
2909 }
2910
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002911 if (drv->bdrv_co_write_zeroes &&
2912 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002913 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002914 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002915 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002916 /* This does not change the data on the disk, it is not necessary
2917 * to flush even in cache=writethrough mode.
2918 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002919 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002920 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002921 }
2922
Stefan Hajnocziab185922011-11-17 13:40:31 +00002923 if (ret < 0) {
2924 /* It might be okay to ignore write errors for guest requests. If this
2925 * is a deliberate copy-on-read then we don't want to ignore the error.
2926 * Simply report it in all cases.
2927 */
2928 goto err;
2929 }
2930
2931 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002932 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2933 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002934
2935err:
2936 qemu_vfree(bounce_buffer);
2937 return ret;
2938}
2939
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002940/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002941 * Forwards an already correctly aligned request to the BlockDriver. This
2942 * handles copy on read and zeroing after EOF; any other features must be
2943 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002944 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002945static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002946 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002947 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002948{
2949 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002950 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002951
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002952 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2953 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002954
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002955 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2956 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02002957 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002958
2959 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002960 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002961 /* If we touch the same cluster it counts as an overlap. This
2962 * guarantees that allocating writes will be serialized and not race
2963 * with each other for the same cluster. For example, in copy-on-read
2964 * it ensures that the CoR read and write operations are atomic and
2965 * guest writes cannot interleave between them. */
2966 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002967 }
2968
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002969 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002970
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002971 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002972 int pnum;
2973
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002974 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002975 if (ret < 0) {
2976 goto out;
2977 }
2978
2979 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002980 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002981 goto out;
2982 }
2983 }
2984
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002985 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002986 if (!(bs->zero_beyond_eof && bs->growable)) {
2987 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2988 } else {
2989 /* Read zeros after EOF of growable BDSes */
Markus Armbruster40490822014-06-26 13:23:19 +02002990 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002991
Markus Armbruster40490822014-06-26 13:23:19 +02002992 total_sectors = bdrv_nb_sectors(bs);
2993 if (total_sectors < 0) {
2994 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002995 goto out;
2996 }
2997
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002998 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2999 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003000 if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003001 QEMUIOVector local_qiov;
3002 size_t local_sectors;
3003
3004 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3005 local_sectors = MIN(max_nb_sectors, nb_sectors);
3006
3007 qemu_iovec_init(&local_qiov, qiov->niov);
3008 qemu_iovec_concat(&local_qiov, qiov, 0,
3009 local_sectors * BDRV_SECTOR_SIZE);
3010
3011 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3012 &local_qiov);
3013
3014 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003015 } else {
3016 ret = 0;
3017 }
3018
3019 /* Reading beyond end of file is supposed to produce zeroes */
3020 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3021 uint64_t offset = MAX(0, total_sectors - sector_num);
3022 uint64_t bytes = (sector_num + nb_sectors - offset) *
3023 BDRV_SECTOR_SIZE;
3024 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3025 }
3026 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003027
3028out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003029 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003030}
3031
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003032/*
3033 * Handle a read request in coroutine context
3034 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003035static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3036 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003037 BdrvRequestFlags flags)
3038{
3039 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003040 BdrvTrackedRequest req;
3041
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003042 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3043 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3044 uint8_t *head_buf = NULL;
3045 uint8_t *tail_buf = NULL;
3046 QEMUIOVector local_qiov;
3047 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003048 int ret;
3049
3050 if (!drv) {
3051 return -ENOMEDIUM;
3052 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003053 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003054 return -EIO;
3055 }
3056
3057 if (bs->copy_on_read) {
3058 flags |= BDRV_REQ_COPY_ON_READ;
3059 }
3060
3061 /* throttling disk I/O */
3062 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003063 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003064 }
3065
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003066 /* Align read if necessary by padding qiov */
3067 if (offset & (align - 1)) {
3068 head_buf = qemu_blockalign(bs, align);
3069 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3070 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3071 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3072 use_local_qiov = true;
3073
3074 bytes += offset & (align - 1);
3075 offset = offset & ~(align - 1);
3076 }
3077
3078 if ((offset + bytes) & (align - 1)) {
3079 if (!use_local_qiov) {
3080 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3081 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3082 use_local_qiov = true;
3083 }
3084 tail_buf = qemu_blockalign(bs, align);
3085 qemu_iovec_add(&local_qiov, tail_buf,
3086 align - ((offset + bytes) & (align - 1)));
3087
3088 bytes = ROUND_UP(bytes, align);
3089 }
3090
Kevin Wolf65afd212013-12-03 14:55:55 +01003091 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003092 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003093 use_local_qiov ? &local_qiov : qiov,
3094 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003095 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003096
3097 if (use_local_qiov) {
3098 qemu_iovec_destroy(&local_qiov);
3099 qemu_vfree(head_buf);
3100 qemu_vfree(tail_buf);
3101 }
3102
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003103 return ret;
3104}
3105
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003106static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3107 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3108 BdrvRequestFlags flags)
3109{
3110 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3111 return -EINVAL;
3112 }
3113
3114 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3115 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3116}
3117
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003118int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003119 int nb_sectors, QEMUIOVector *qiov)
3120{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003121 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003122
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003123 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3124}
3125
3126int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3127 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3128{
3129 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3130
3131 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3132 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003133}
3134
Peter Lievenc31cb702013-10-24 12:06:58 +02003135/* if no limit is specified in the BlockLimits use a default
3136 * of 32768 512-byte sectors (16 MiB) per request.
3137 */
3138#define MAX_WRITE_ZEROES_DEFAULT 32768
3139
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003140static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003141 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003142{
3143 BlockDriver *drv = bs->drv;
3144 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003145 struct iovec iov = {0};
3146 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003147
Peter Lievenc31cb702013-10-24 12:06:58 +02003148 int max_write_zeroes = bs->bl.max_write_zeroes ?
3149 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003150
Peter Lievenc31cb702013-10-24 12:06:58 +02003151 while (nb_sectors > 0 && !ret) {
3152 int num = nb_sectors;
3153
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003154 /* Align request. Block drivers can expect the "bulk" of the request
3155 * to be aligned.
3156 */
3157 if (bs->bl.write_zeroes_alignment
3158 && num > bs->bl.write_zeroes_alignment) {
3159 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3160 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003161 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003162 num -= sector_num % bs->bl.write_zeroes_alignment;
3163 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3164 /* Shorten the request to the last aligned sector. num cannot
3165 * underflow because num > bs->bl.write_zeroes_alignment.
3166 */
3167 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003168 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003169 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003170
3171 /* limit request size */
3172 if (num > max_write_zeroes) {
3173 num = max_write_zeroes;
3174 }
3175
3176 ret = -ENOTSUP;
3177 /* First try the efficient write zeroes operation */
3178 if (drv->bdrv_co_write_zeroes) {
3179 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3180 }
3181
3182 if (ret == -ENOTSUP) {
3183 /* Fall back to bounce buffer if write zeroes is unsupported */
3184 iov.iov_len = num * BDRV_SECTOR_SIZE;
3185 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003186 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3187 if (iov.iov_base == NULL) {
3188 ret = -ENOMEM;
3189 goto fail;
3190 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003191 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003192 }
3193 qemu_iovec_init_external(&qiov, &iov, 1);
3194
3195 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003196
3197 /* Keep bounce buffer around if it is big enough for all
3198 * all future requests.
3199 */
3200 if (num < max_write_zeroes) {
3201 qemu_vfree(iov.iov_base);
3202 iov.iov_base = NULL;
3203 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003204 }
3205
3206 sector_num += num;
3207 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003208 }
3209
Kevin Wolf857d4f42014-05-20 13:16:51 +02003210fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003211 qemu_vfree(iov.iov_base);
3212 return ret;
3213}
3214
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003215/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003216 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003217 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003218static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003219 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3220 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003221{
3222 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003223 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003224 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003225
Kevin Wolfb404f722013-12-03 14:02:23 +01003226 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3227 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003228
Kevin Wolfb404f722013-12-03 14:02:23 +01003229 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3230 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003231 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003232
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003233 waited = wait_serialising_requests(req);
3234 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003235 assert(req->overlap_offset <= offset);
3236 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003237
Kevin Wolf65afd212013-12-03 14:55:55 +01003238 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003239
Peter Lieven465bee12014-05-18 00:58:19 +02003240 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3241 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3242 qemu_iovec_is_zero(qiov)) {
3243 flags |= BDRV_REQ_ZERO_WRITE;
3244 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3245 flags |= BDRV_REQ_MAY_UNMAP;
3246 }
3247 }
3248
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003249 if (ret < 0) {
3250 /* Do nothing, write notifier decided to fail this request */
3251 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003252 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003253 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003254 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003255 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003256 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3257 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003258 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003259
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003260 if (ret == 0 && !bs->enable_write_cache) {
3261 ret = bdrv_co_flush(bs);
3262 }
3263
Fam Zhenge4654d22013-11-13 18:29:43 +08003264 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003265
Benoît Canet5366d0c2014-09-05 15:46:18 +02003266 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003267
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003268 if (bs->growable && ret >= 0) {
3269 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3270 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003271
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003272 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003273}
3274
Kevin Wolfb404f722013-12-03 14:02:23 +01003275/*
3276 * Handle a write request in coroutine context
3277 */
Kevin Wolf66015532013-12-03 14:40:18 +01003278static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3279 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003280 BdrvRequestFlags flags)
3281{
Kevin Wolf65afd212013-12-03 14:55:55 +01003282 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003283 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3284 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3285 uint8_t *head_buf = NULL;
3286 uint8_t *tail_buf = NULL;
3287 QEMUIOVector local_qiov;
3288 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003289 int ret;
3290
3291 if (!bs->drv) {
3292 return -ENOMEDIUM;
3293 }
3294 if (bs->read_only) {
3295 return -EACCES;
3296 }
Kevin Wolf66015532013-12-03 14:40:18 +01003297 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003298 return -EIO;
3299 }
3300
Kevin Wolfb404f722013-12-03 14:02:23 +01003301 /* throttling disk I/O */
3302 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003303 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003304 }
3305
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003306 /*
3307 * Align write if necessary by performing a read-modify-write cycle.
3308 * Pad qiov with the read parts and be sure to have a tracked request not
3309 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3310 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003311 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003312
3313 if (offset & (align - 1)) {
3314 QEMUIOVector head_qiov;
3315 struct iovec head_iov;
3316
3317 mark_request_serialising(&req, align);
3318 wait_serialising_requests(&req);
3319
3320 head_buf = qemu_blockalign(bs, align);
3321 head_iov = (struct iovec) {
3322 .iov_base = head_buf,
3323 .iov_len = align,
3324 };
3325 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3326
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003327 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003328 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3329 align, &head_qiov, 0);
3330 if (ret < 0) {
3331 goto fail;
3332 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003333 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003334
3335 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3336 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3337 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3338 use_local_qiov = true;
3339
3340 bytes += offset & (align - 1);
3341 offset = offset & ~(align - 1);
3342 }
3343
3344 if ((offset + bytes) & (align - 1)) {
3345 QEMUIOVector tail_qiov;
3346 struct iovec tail_iov;
3347 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003348 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003349
3350 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003351 waited = wait_serialising_requests(&req);
3352 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003353
3354 tail_buf = qemu_blockalign(bs, align);
3355 tail_iov = (struct iovec) {
3356 .iov_base = tail_buf,
3357 .iov_len = align,
3358 };
3359 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3360
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003361 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003362 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3363 align, &tail_qiov, 0);
3364 if (ret < 0) {
3365 goto fail;
3366 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003367 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003368
3369 if (!use_local_qiov) {
3370 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3371 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3372 use_local_qiov = true;
3373 }
3374
3375 tail_bytes = (offset + bytes) & (align - 1);
3376 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3377
3378 bytes = ROUND_UP(bytes, align);
3379 }
3380
3381 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3382 use_local_qiov ? &local_qiov : qiov,
3383 flags);
3384
3385fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003386 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003387
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003388 if (use_local_qiov) {
3389 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003390 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003391 qemu_vfree(head_buf);
3392 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003393
Kevin Wolfb404f722013-12-03 14:02:23 +01003394 return ret;
3395}
3396
Kevin Wolf66015532013-12-03 14:40:18 +01003397static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3398 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3399 BdrvRequestFlags flags)
3400{
3401 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3402 return -EINVAL;
3403 }
3404
3405 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3406 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3407}
3408
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003409int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3410 int nb_sectors, QEMUIOVector *qiov)
3411{
3412 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3413
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003414 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3415}
3416
3417int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003418 int64_t sector_num, int nb_sectors,
3419 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003420{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003421 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003422
Peter Lievend32f35c2013-10-24 12:06:52 +02003423 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3424 flags &= ~BDRV_REQ_MAY_UNMAP;
3425 }
3426
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003427 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003428 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003429}
3430
bellard83f64092006-08-01 16:21:11 +00003431/**
bellard83f64092006-08-01 16:21:11 +00003432 * Truncate file to 'offset' bytes (needed only for file protocols)
3433 */
3434int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3435{
3436 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003437 int ret;
bellard83f64092006-08-01 16:21:11 +00003438 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003439 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003440 if (!drv->bdrv_truncate)
3441 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003442 if (bs->read_only)
3443 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003444
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003445 ret = drv->bdrv_truncate(bs, offset);
3446 if (ret == 0) {
3447 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003448 if (bs->blk) {
3449 blk_dev_resize_cb(bs->blk);
3450 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003451 }
3452 return ret;
bellard83f64092006-08-01 16:21:11 +00003453}
3454
3455/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003456 * Length of a allocated file in bytes. Sparse files are counted by actual
3457 * allocated space. Return < 0 if error or unknown.
3458 */
3459int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3460{
3461 BlockDriver *drv = bs->drv;
3462 if (!drv) {
3463 return -ENOMEDIUM;
3464 }
3465 if (drv->bdrv_get_allocated_file_size) {
3466 return drv->bdrv_get_allocated_file_size(bs);
3467 }
3468 if (bs->file) {
3469 return bdrv_get_allocated_file_size(bs->file);
3470 }
3471 return -ENOTSUP;
3472}
3473
3474/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003475 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003476 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003477int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003478{
3479 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003480
bellard83f64092006-08-01 16:21:11 +00003481 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003482 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003483
Kevin Wolfb94a2612013-10-29 12:18:58 +01003484 if (drv->has_variable_length) {
3485 int ret = refresh_total_sectors(bs, bs->total_sectors);
3486 if (ret < 0) {
3487 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003488 }
bellard83f64092006-08-01 16:21:11 +00003489 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003490 return bs->total_sectors;
3491}
3492
3493/**
3494 * Return length in bytes on success, -errno on error.
3495 * The length is always a multiple of BDRV_SECTOR_SIZE.
3496 */
3497int64_t bdrv_getlength(BlockDriverState *bs)
3498{
3499 int64_t ret = bdrv_nb_sectors(bs);
3500
3501 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003502}
3503
bellard19cb3732006-08-19 11:45:59 +00003504/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003505void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003506{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003507 int64_t nb_sectors = bdrv_nb_sectors(bs);
3508
3509 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003510}
bellardcf989512004-02-16 21:56:36 +00003511
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003512void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3513 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003514{
3515 bs->on_read_error = on_read_error;
3516 bs->on_write_error = on_write_error;
3517}
3518
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003519BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003520{
3521 return is_read ? bs->on_read_error : bs->on_write_error;
3522}
3523
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003524BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3525{
3526 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3527
3528 switch (on_err) {
3529 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003530 return (error == ENOSPC) ?
3531 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003532 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003533 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003534 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003535 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003536 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003537 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003538 default:
3539 abort();
3540 }
3541}
3542
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003543static void send_qmp_error_event(BlockDriverState *bs,
3544 BlockErrorAction action,
3545 bool is_read, int error)
3546{
3547 BlockErrorAction ac;
3548
3549 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3550 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3551 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003552 error == ENOSPC, strerror(error),
3553 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003554}
3555
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003556/* This is done by device models because, while the block layer knows
3557 * about the error, it does not know whether an operation comes from
3558 * the device or the block layer (from a job, for example).
3559 */
3560void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3561 bool is_read, int error)
3562{
3563 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003564
Wenchao Xiaa5895692014-06-18 08:43:30 +02003565 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003566 /* First set the iostatus, so that "info block" returns an iostatus
3567 * that matches the events raised so far (an additional error iostatus
3568 * is fine, but not a lost one).
3569 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003570 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003571
3572 /* Then raise the request to stop the VM and the event.
3573 * qemu_system_vmstop_request_prepare has two effects. First,
3574 * it ensures that the STOP event always comes after the
3575 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3576 * can observe the STOP event and do a "cont" before the STOP
3577 * event is issued, the VM will not stop. In this case, vm_start()
3578 * also ensures that the STOP/RESUME pair of events is emitted.
3579 */
3580 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003581 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003582 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3583 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003584 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003585 }
3586}
3587
bellardb3380822004-03-14 21:38:54 +00003588int bdrv_is_read_only(BlockDriverState *bs)
3589{
3590 return bs->read_only;
3591}
3592
ths985a03b2007-12-24 16:10:43 +00003593int bdrv_is_sg(BlockDriverState *bs)
3594{
3595 return bs->sg;
3596}
3597
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003598int bdrv_enable_write_cache(BlockDriverState *bs)
3599{
3600 return bs->enable_write_cache;
3601}
3602
Paolo Bonzini425b0142012-06-06 00:04:52 +02003603void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3604{
3605 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003606
3607 /* so a reopen() will preserve wce */
3608 if (wce) {
3609 bs->open_flags |= BDRV_O_CACHE_WB;
3610 } else {
3611 bs->open_flags &= ~BDRV_O_CACHE_WB;
3612 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003613}
3614
bellardea2384d2004-08-01 21:59:26 +00003615int bdrv_is_encrypted(BlockDriverState *bs)
3616{
3617 if (bs->backing_hd && bs->backing_hd->encrypted)
3618 return 1;
3619 return bs->encrypted;
3620}
3621
aliguoric0f4ce72009-03-05 23:01:01 +00003622int bdrv_key_required(BlockDriverState *bs)
3623{
3624 BlockDriverState *backing_hd = bs->backing_hd;
3625
3626 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3627 return 1;
3628 return (bs->encrypted && !bs->valid_key);
3629}
3630
bellardea2384d2004-08-01 21:59:26 +00003631int bdrv_set_key(BlockDriverState *bs, const char *key)
3632{
3633 int ret;
3634 if (bs->backing_hd && bs->backing_hd->encrypted) {
3635 ret = bdrv_set_key(bs->backing_hd, key);
3636 if (ret < 0)
3637 return ret;
3638 if (!bs->encrypted)
3639 return 0;
3640 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003641 if (!bs->encrypted) {
3642 return -EINVAL;
3643 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3644 return -ENOMEDIUM;
3645 }
aliguoric0f4ce72009-03-05 23:01:01 +00003646 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003647 if (ret < 0) {
3648 bs->valid_key = 0;
3649 } else if (!bs->valid_key) {
3650 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003651 if (bs->blk) {
3652 /* call the change callback now, we skipped it on open */
3653 blk_dev_change_media_cb(bs->blk, true);
3654 }
aliguoribb5fc202009-03-05 23:01:15 +00003655 }
aliguoric0f4ce72009-03-05 23:01:01 +00003656 return ret;
bellardea2384d2004-08-01 21:59:26 +00003657}
3658
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003659const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003660{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003661 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003662}
3663
Stefan Hajnocziada42402014-08-27 12:08:55 +01003664static int qsort_strcmp(const void *a, const void *b)
3665{
3666 return strcmp(a, b);
3667}
3668
ths5fafdf22007-09-16 21:08:06 +00003669void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003670 void *opaque)
3671{
3672 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003673 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003674 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003675 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003676
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003677 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003678 if (drv->format_name) {
3679 bool found = false;
3680 int i = count;
3681 while (formats && i && !found) {
3682 found = !strcmp(formats[--i], drv->format_name);
3683 }
3684
3685 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003686 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003687 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003688 }
3689 }
bellardea2384d2004-08-01 21:59:26 +00003690 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003691
3692 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3693
3694 for (i = 0; i < count; i++) {
3695 it(opaque, formats[i]);
3696 }
3697
Jeff Codye855e4f2014-04-28 18:29:54 -04003698 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003699}
3700
Benoît Canetdc364f42014-01-23 21:31:32 +01003701/* This function is to find block backend bs */
Markus Armbruster7f06d472014-10-07 13:59:12 +02003702/* TODO convert callers to blk_by_name(), then remove */
bellardb3380822004-03-14 21:38:54 +00003703BlockDriverState *bdrv_find(const char *name)
3704{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003705 BlockBackend *blk = blk_by_name(name);
bellardb3380822004-03-14 21:38:54 +00003706
Markus Armbruster7f06d472014-10-07 13:59:12 +02003707 return blk ? blk_bs(blk) : NULL;
bellardb3380822004-03-14 21:38:54 +00003708}
3709
Benoît Canetdc364f42014-01-23 21:31:32 +01003710/* This function is to find a node in the bs graph */
3711BlockDriverState *bdrv_find_node(const char *node_name)
3712{
3713 BlockDriverState *bs;
3714
3715 assert(node_name);
3716
3717 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3718 if (!strcmp(node_name, bs->node_name)) {
3719 return bs;
3720 }
3721 }
3722 return NULL;
3723}
3724
Benoît Canetc13163f2014-01-23 21:31:34 +01003725/* Put this QMP function here so it can access the static graph_bdrv_states. */
3726BlockDeviceInfoList *bdrv_named_nodes_list(void)
3727{
3728 BlockDeviceInfoList *list, *entry;
3729 BlockDriverState *bs;
3730
3731 list = NULL;
3732 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3733 entry = g_malloc0(sizeof(*entry));
3734 entry->value = bdrv_block_device_info(bs);
3735 entry->next = list;
3736 list = entry;
3737 }
3738
3739 return list;
3740}
3741
Benoît Canet12d3ba82014-01-23 21:31:35 +01003742BlockDriverState *bdrv_lookup_bs(const char *device,
3743 const char *node_name,
3744 Error **errp)
3745{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003746 BlockBackend *blk;
3747 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003748
Benoît Canet12d3ba82014-01-23 21:31:35 +01003749 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003750 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003751
Markus Armbruster7f06d472014-10-07 13:59:12 +02003752 if (blk) {
3753 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003754 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003755 }
3756
Benoît Canetdd67fa52014-02-12 17:15:06 +01003757 if (node_name) {
3758 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003759
Benoît Canetdd67fa52014-02-12 17:15:06 +01003760 if (bs) {
3761 return bs;
3762 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003763 }
3764
Benoît Canetdd67fa52014-02-12 17:15:06 +01003765 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3766 device ? device : "",
3767 node_name ? node_name : "");
3768 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003769}
3770
Jeff Cody5a6684d2014-06-25 15:40:09 -04003771/* If 'base' is in the same chain as 'top', return true. Otherwise,
3772 * return false. If either argument is NULL, return false. */
3773bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3774{
3775 while (top && top != base) {
3776 top = top->backing_hd;
3777 }
3778
3779 return top != NULL;
3780}
3781
Markus Armbruster2f399b02010-06-02 18:55:20 +02003782BlockDriverState *bdrv_next(BlockDriverState *bs)
3783{
3784 if (!bs) {
3785 return QTAILQ_FIRST(&bdrv_states);
3786 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003787 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003788}
3789
Markus Armbruster7f06d472014-10-07 13:59:12 +02003790/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003791const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003792{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003793 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003794}
3795
Markus Armbrusterc8433282012-06-05 16:49:24 +02003796int bdrv_get_flags(BlockDriverState *bs)
3797{
3798 return bs->open_flags;
3799}
3800
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003801int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003802{
3803 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003804 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003805
Benoît Canetdc364f42014-01-23 21:31:32 +01003806 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003807 AioContext *aio_context = bdrv_get_aio_context(bs);
3808 int ret;
3809
3810 aio_context_acquire(aio_context);
3811 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003812 if (ret < 0 && !result) {
3813 result = ret;
3814 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003815 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003816 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003817
3818 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003819}
3820
Peter Lieven3ac21622013-06-28 12:47:42 +02003821int bdrv_has_zero_init_1(BlockDriverState *bs)
3822{
3823 return 1;
3824}
3825
Kevin Wolff2feebb2010-04-14 17:30:35 +02003826int bdrv_has_zero_init(BlockDriverState *bs)
3827{
3828 assert(bs->drv);
3829
Paolo Bonzini11212d82013-09-04 19:00:27 +02003830 /* If BS is a copy on write image, it is initialized to
3831 the contents of the base image, which may not be zeroes. */
3832 if (bs->backing_hd) {
3833 return 0;
3834 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003835 if (bs->drv->bdrv_has_zero_init) {
3836 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003837 }
3838
Peter Lieven3ac21622013-06-28 12:47:42 +02003839 /* safe default */
3840 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003841}
3842
Peter Lieven4ce78692013-10-24 12:06:54 +02003843bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3844{
3845 BlockDriverInfo bdi;
3846
3847 if (bs->backing_hd) {
3848 return false;
3849 }
3850
3851 if (bdrv_get_info(bs, &bdi) == 0) {
3852 return bdi.unallocated_blocks_are_zero;
3853 }
3854
3855 return false;
3856}
3857
3858bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3859{
3860 BlockDriverInfo bdi;
3861
3862 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3863 return false;
3864 }
3865
3866 if (bdrv_get_info(bs, &bdi) == 0) {
3867 return bdi.can_write_zeroes_with_unmap;
3868 }
3869
3870 return false;
3871}
3872
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003873typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003874 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003875 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003876 int64_t sector_num;
3877 int nb_sectors;
3878 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003879 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003880 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003881} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003882
thsf58c7b32008-06-05 21:53:49 +00003883/*
3884 * Returns true iff the specified sector is present in the disk image. Drivers
3885 * not implementing the functionality are assumed to not support backing files,
3886 * hence all their sectors are reported as allocated.
3887 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003888 * If 'sector_num' is beyond the end of the disk image the return value is 0
3889 * and 'pnum' is set to 0.
3890 *
thsf58c7b32008-06-05 21:53:49 +00003891 * 'pnum' is set to the number of sectors (including and immediately following
3892 * the specified sector) that are known to be in the same
3893 * allocated/unallocated state.
3894 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003895 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3896 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003897 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003898static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3899 int64_t sector_num,
3900 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003901{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003902 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003903 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003904 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003905
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003906 total_sectors = bdrv_nb_sectors(bs);
3907 if (total_sectors < 0) {
3908 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003909 }
3910
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003911 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003912 *pnum = 0;
3913 return 0;
3914 }
3915
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003916 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003917 if (n < nb_sectors) {
3918 nb_sectors = n;
3919 }
3920
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003921 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003922 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02003923 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003924 if (bs->drv->protocol_name) {
3925 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3926 }
3927 return ret;
thsf58c7b32008-06-05 21:53:49 +00003928 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003929
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003930 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3931 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003932 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003933 return ret;
3934 }
3935
Peter Lieven92bc50a2013-10-08 14:43:14 +02003936 if (ret & BDRV_BLOCK_RAW) {
3937 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3938 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3939 *pnum, pnum);
3940 }
3941
Kevin Wolfe88ae222014-05-06 15:25:36 +02003942 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
3943 ret |= BDRV_BLOCK_ALLOCATED;
3944 }
3945
Peter Lievenc3d86882013-10-24 12:07:04 +02003946 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3947 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003948 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003949 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003950 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003951 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
3952 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003953 ret |= BDRV_BLOCK_ZERO;
3954 }
3955 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003956 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003957
3958 if (bs->file &&
3959 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3960 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02003961 int file_pnum;
3962
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003963 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02003964 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003965 if (ret2 >= 0) {
3966 /* Ignore errors. This is just providing extra information, it
3967 * is useful but not necessary.
3968 */
Max Reitz59c9a952014-10-22 17:00:15 +02003969 if (!file_pnum) {
3970 /* !file_pnum indicates an offset at or beyond the EOF; it is
3971 * perfectly valid for the format block driver to point to such
3972 * offsets, so catch it and mark everything as zero */
3973 ret |= BDRV_BLOCK_ZERO;
3974 } else {
3975 /* Limit request to the range reported by the protocol driver */
3976 *pnum = file_pnum;
3977 ret |= (ret2 & BDRV_BLOCK_ZERO);
3978 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003979 }
3980 }
3981
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003982 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003983}
3984
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003985/* Coroutine wrapper for bdrv_get_block_status() */
3986static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003987{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003988 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003989 BlockDriverState *bs = data->bs;
3990
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003991 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3992 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003993 data->done = true;
3994}
3995
3996/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003997 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003998 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003999 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004000 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004001int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4002 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004003{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004004 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004005 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004006 .bs = bs,
4007 .sector_num = sector_num,
4008 .nb_sectors = nb_sectors,
4009 .pnum = pnum,
4010 .done = false,
4011 };
4012
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004013 if (qemu_in_coroutine()) {
4014 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004015 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004016 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004017 AioContext *aio_context = bdrv_get_aio_context(bs);
4018
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004019 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004020 qemu_coroutine_enter(co, &data);
4021 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004022 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004023 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004024 }
4025 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004026}
4027
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004028int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4029 int nb_sectors, int *pnum)
4030{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004031 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4032 if (ret < 0) {
4033 return ret;
4034 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004035 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004036}
4037
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004038/*
4039 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4040 *
4041 * Return true if the given sector is allocated in any image between
4042 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4043 * sector is allocated in any image of the chain. Return false otherwise.
4044 *
4045 * 'pnum' is set to the number of sectors (including and immediately following
4046 * the specified sector) that are known to be in the same
4047 * allocated/unallocated state.
4048 *
4049 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004050int bdrv_is_allocated_above(BlockDriverState *top,
4051 BlockDriverState *base,
4052 int64_t sector_num,
4053 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004054{
4055 BlockDriverState *intermediate;
4056 int ret, n = nb_sectors;
4057
4058 intermediate = top;
4059 while (intermediate && intermediate != base) {
4060 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004061 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4062 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004063 if (ret < 0) {
4064 return ret;
4065 } else if (ret) {
4066 *pnum = pnum_inter;
4067 return 1;
4068 }
4069
4070 /*
4071 * [sector_num, nb_sectors] is unallocated on top but intermediate
4072 * might have
4073 *
4074 * [sector_num+x, nr_sectors] allocated.
4075 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004076 if (n > pnum_inter &&
4077 (intermediate == top ||
4078 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004079 n = pnum_inter;
4080 }
4081
4082 intermediate = intermediate->backing_hd;
4083 }
4084
4085 *pnum = n;
4086 return 0;
4087}
4088
aliguori045df332009-03-05 23:00:48 +00004089const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4090{
4091 if (bs->backing_hd && bs->backing_hd->encrypted)
4092 return bs->backing_file;
4093 else if (bs->encrypted)
4094 return bs->filename;
4095 else
4096 return NULL;
4097}
4098
ths5fafdf22007-09-16 21:08:06 +00004099void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004100 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004101{
Kevin Wolf3574c602011-10-26 11:02:11 +02004102 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004103}
4104
ths5fafdf22007-09-16 21:08:06 +00004105int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004106 const uint8_t *buf, int nb_sectors)
4107{
4108 BlockDriver *drv = bs->drv;
4109 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004110 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004111 if (!drv->bdrv_write_compressed)
4112 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004113 if (bdrv_check_request(bs, sector_num, nb_sectors))
4114 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004115
Fam Zhenge4654d22013-11-13 18:29:43 +08004116 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004117
bellardfaea38e2006-08-05 21:31:00 +00004118 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4119}
ths3b46e622007-09-17 08:09:54 +00004120
bellardfaea38e2006-08-05 21:31:00 +00004121int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4122{
4123 BlockDriver *drv = bs->drv;
4124 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004125 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004126 if (!drv->bdrv_get_info)
4127 return -ENOTSUP;
4128 memset(bdi, 0, sizeof(*bdi));
4129 return drv->bdrv_get_info(bs, bdi);
4130}
4131
Max Reitzeae041f2013-10-09 10:46:16 +02004132ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4133{
4134 BlockDriver *drv = bs->drv;
4135 if (drv && drv->bdrv_get_specific_info) {
4136 return drv->bdrv_get_specific_info(bs);
4137 }
4138 return NULL;
4139}
4140
Christoph Hellwig45566e92009-07-10 23:11:57 +02004141int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4142 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004143{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004144 QEMUIOVector qiov;
4145 struct iovec iov = {
4146 .iov_base = (void *) buf,
4147 .iov_len = size,
4148 };
4149
4150 qemu_iovec_init_external(&qiov, &iov, 1);
4151 return bdrv_writev_vmstate(bs, &qiov, pos);
4152}
4153
4154int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4155{
aliguori178e08a2009-04-05 19:10:55 +00004156 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004157
4158 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004159 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004160 } else if (drv->bdrv_save_vmstate) {
4161 return drv->bdrv_save_vmstate(bs, qiov, pos);
4162 } else if (bs->file) {
4163 return bdrv_writev_vmstate(bs->file, qiov, pos);
4164 }
4165
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004166 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004167}
4168
Christoph Hellwig45566e92009-07-10 23:11:57 +02004169int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4170 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004171{
4172 BlockDriver *drv = bs->drv;
4173 if (!drv)
4174 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004175 if (drv->bdrv_load_vmstate)
4176 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4177 if (bs->file)
4178 return bdrv_load_vmstate(bs->file, buf, pos, size);
4179 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004180}
4181
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004182void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4183{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004184 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004185 return;
4186 }
4187
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004188 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004189}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004190
Kevin Wolf41c695c2012-12-06 14:32:58 +01004191int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4192 const char *tag)
4193{
4194 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4195 bs = bs->file;
4196 }
4197
4198 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4199 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4200 }
4201
4202 return -ENOTSUP;
4203}
4204
Fam Zheng4cc70e92013-11-20 10:01:54 +08004205int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4206{
4207 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4208 bs = bs->file;
4209 }
4210
4211 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4212 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4213 }
4214
4215 return -ENOTSUP;
4216}
4217
Kevin Wolf41c695c2012-12-06 14:32:58 +01004218int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4219{
Max Reitz938789e2014-03-10 23:44:08 +01004220 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004221 bs = bs->file;
4222 }
4223
4224 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4225 return bs->drv->bdrv_debug_resume(bs, tag);
4226 }
4227
4228 return -ENOTSUP;
4229}
4230
4231bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4232{
4233 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4234 bs = bs->file;
4235 }
4236
4237 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4238 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4239 }
4240
4241 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004242}
4243
Blue Swirl199630b2010-07-25 20:49:34 +00004244int bdrv_is_snapshot(BlockDriverState *bs)
4245{
4246 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4247}
4248
Jeff Codyb1b1d782012-10-16 15:49:09 -04004249/* backing_file can either be relative, or absolute, or a protocol. If it is
4250 * relative, it must be relative to the chain. So, passing in bs->filename
4251 * from a BDS as backing_file should not be done, as that may be relative to
4252 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004253BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4254 const char *backing_file)
4255{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004256 char *filename_full = NULL;
4257 char *backing_file_full = NULL;
4258 char *filename_tmp = NULL;
4259 int is_protocol = 0;
4260 BlockDriverState *curr_bs = NULL;
4261 BlockDriverState *retval = NULL;
4262
4263 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004264 return NULL;
4265 }
4266
Jeff Codyb1b1d782012-10-16 15:49:09 -04004267 filename_full = g_malloc(PATH_MAX);
4268 backing_file_full = g_malloc(PATH_MAX);
4269 filename_tmp = g_malloc(PATH_MAX);
4270
4271 is_protocol = path_has_protocol(backing_file);
4272
4273 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4274
4275 /* If either of the filename paths is actually a protocol, then
4276 * compare unmodified paths; otherwise make paths relative */
4277 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4278 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4279 retval = curr_bs->backing_hd;
4280 break;
4281 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004282 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004283 /* If not an absolute filename path, make it relative to the current
4284 * image's filename path */
4285 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4286 backing_file);
4287
4288 /* We are going to compare absolute pathnames */
4289 if (!realpath(filename_tmp, filename_full)) {
4290 continue;
4291 }
4292
4293 /* We need to make sure the backing filename we are comparing against
4294 * is relative to the current image filename (or absolute) */
4295 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4296 curr_bs->backing_file);
4297
4298 if (!realpath(filename_tmp, backing_file_full)) {
4299 continue;
4300 }
4301
4302 if (strcmp(backing_file_full, filename_full) == 0) {
4303 retval = curr_bs->backing_hd;
4304 break;
4305 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004306 }
4307 }
4308
Jeff Codyb1b1d782012-10-16 15:49:09 -04004309 g_free(filename_full);
4310 g_free(backing_file_full);
4311 g_free(filename_tmp);
4312 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004313}
4314
Benoît Canetf198fd12012-08-02 10:22:47 +02004315int bdrv_get_backing_file_depth(BlockDriverState *bs)
4316{
4317 if (!bs->drv) {
4318 return 0;
4319 }
4320
4321 if (!bs->backing_hd) {
4322 return 0;
4323 }
4324
4325 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4326}
4327
bellard83f64092006-08-01 16:21:11 +00004328/**************************************************************/
4329/* async I/Os */
4330
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004331BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4332 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004333 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004334{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004335 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4336
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004337 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004338 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004339}
4340
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004341BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4342 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004343 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004344{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004345 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4346
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004347 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004348 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004349}
4350
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004351BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004352 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004353 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004354{
4355 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4356
4357 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4358 BDRV_REQ_ZERO_WRITE | flags,
4359 cb, opaque, true);
4360}
4361
Kevin Wolf40b4f532009-09-09 17:53:37 +02004362
4363typedef struct MultiwriteCB {
4364 int error;
4365 int num_requests;
4366 int num_callbacks;
4367 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004368 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004369 void *opaque;
4370 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004371 } callbacks[];
4372} MultiwriteCB;
4373
4374static void multiwrite_user_cb(MultiwriteCB *mcb)
4375{
4376 int i;
4377
4378 for (i = 0; i < mcb->num_callbacks; i++) {
4379 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004380 if (mcb->callbacks[i].free_qiov) {
4381 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4382 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004383 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004384 }
4385}
4386
4387static void multiwrite_cb(void *opaque, int ret)
4388{
4389 MultiwriteCB *mcb = opaque;
4390
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004391 trace_multiwrite_cb(mcb, ret);
4392
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004393 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004394 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004395 }
4396
4397 mcb->num_requests--;
4398 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004399 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004400 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004401 }
4402}
4403
4404static int multiwrite_req_compare(const void *a, const void *b)
4405{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004406 const BlockRequest *req1 = a, *req2 = b;
4407
4408 /*
4409 * Note that we can't simply subtract req2->sector from req1->sector
4410 * here as that could overflow the return value.
4411 */
4412 if (req1->sector > req2->sector) {
4413 return 1;
4414 } else if (req1->sector < req2->sector) {
4415 return -1;
4416 } else {
4417 return 0;
4418 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004419}
4420
4421/*
4422 * Takes a bunch of requests and tries to merge them. Returns the number of
4423 * requests that remain after merging.
4424 */
4425static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4426 int num_reqs, MultiwriteCB *mcb)
4427{
4428 int i, outidx;
4429
4430 // Sort requests by start sector
4431 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4432
4433 // Check if adjacent requests touch the same clusters. If so, combine them,
4434 // filling up gaps with zero sectors.
4435 outidx = 0;
4436 for (i = 1; i < num_reqs; i++) {
4437 int merge = 0;
4438 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4439
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004440 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004441 if (reqs[i].sector <= oldreq_last) {
4442 merge = 1;
4443 }
4444
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004445 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4446 merge = 0;
4447 }
4448
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004449 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4450 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4451 merge = 0;
4452 }
4453
Kevin Wolf40b4f532009-09-09 17:53:37 +02004454 if (merge) {
4455 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004456 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004457 qemu_iovec_init(qiov,
4458 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4459
4460 // Add the first request to the merged one. If the requests are
4461 // overlapping, drop the last sectors of the first request.
4462 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004463 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004464
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004465 // We should need to add any zeros between the two requests
4466 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004467
4468 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004469 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004470
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004471 // Add tail of first request, if necessary
4472 if (qiov->size < reqs[outidx].qiov->size) {
4473 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4474 reqs[outidx].qiov->size - qiov->size);
4475 }
4476
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004477 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004478 reqs[outidx].qiov = qiov;
4479
4480 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4481 } else {
4482 outidx++;
4483 reqs[outidx].sector = reqs[i].sector;
4484 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4485 reqs[outidx].qiov = reqs[i].qiov;
4486 }
4487 }
4488
4489 return outidx + 1;
4490}
4491
4492/*
4493 * Submit multiple AIO write requests at once.
4494 *
4495 * On success, the function returns 0 and all requests in the reqs array have
4496 * been submitted. In error case this function returns -1, and any of the
4497 * requests may or may not be submitted yet. In particular, this means that the
4498 * callback will be called for some of the requests, for others it won't. The
4499 * caller must check the error field of the BlockRequest to wait for the right
4500 * callbacks (if error != 0, no callback will be called).
4501 *
4502 * The implementation may modify the contents of the reqs array, e.g. to merge
4503 * requests. However, the fields opaque and error are left unmodified as they
4504 * are used to signal failure for a single request to the caller.
4505 */
4506int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4507{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004508 MultiwriteCB *mcb;
4509 int i;
4510
Ryan Harper301db7c2011-03-07 10:01:04 -06004511 /* don't submit writes if we don't have a medium */
4512 if (bs->drv == NULL) {
4513 for (i = 0; i < num_reqs; i++) {
4514 reqs[i].error = -ENOMEDIUM;
4515 }
4516 return -1;
4517 }
4518
Kevin Wolf40b4f532009-09-09 17:53:37 +02004519 if (num_reqs == 0) {
4520 return 0;
4521 }
4522
4523 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004524 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004525 mcb->num_requests = 0;
4526 mcb->num_callbacks = num_reqs;
4527
4528 for (i = 0; i < num_reqs; i++) {
4529 mcb->callbacks[i].cb = reqs[i].cb;
4530 mcb->callbacks[i].opaque = reqs[i].opaque;
4531 }
4532
4533 // Check for mergable requests
4534 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4535
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004536 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4537
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004538 /* Run the aio requests. */
4539 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004540 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004541 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4542 reqs[i].nb_sectors, reqs[i].flags,
4543 multiwrite_cb, mcb,
4544 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004545 }
4546
4547 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004548}
4549
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004550void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004551{
Fam Zhengca5fd112014-09-11 13:41:27 +08004552 qemu_aio_ref(acb);
4553 bdrv_aio_cancel_async(acb);
4554 while (acb->refcnt > 1) {
4555 if (acb->aiocb_info->get_aio_context) {
4556 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4557 } else if (acb->bs) {
4558 aio_poll(bdrv_get_aio_context(acb->bs), true);
4559 } else {
4560 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004561 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004562 }
Fam Zheng80074292014-09-11 13:41:28 +08004563 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004564}
4565
4566/* Async version of aio cancel. The caller is not blocked if the acb implements
4567 * cancel_async, otherwise we do nothing and let the request normally complete.
4568 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004569void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004570{
4571 if (acb->aiocb_info->cancel_async) {
4572 acb->aiocb_info->cancel_async(acb);
4573 }
bellard83f64092006-08-01 16:21:11 +00004574}
4575
4576/**************************************************************/
4577/* async block device emulation */
4578
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004579typedef struct BlockAIOCBSync {
4580 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004581 QEMUBH *bh;
4582 int ret;
4583 /* vector translation state */
4584 QEMUIOVector *qiov;
4585 uint8_t *bounce;
4586 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004587} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004588
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004589static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004590 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004591};
4592
bellard83f64092006-08-01 16:21:11 +00004593static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004594{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004595 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004596
Kevin Wolf857d4f42014-05-20 13:16:51 +02004597 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004598 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004599 }
aliguoriceb42de2009-04-07 18:43:28 +00004600 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004601 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004602 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004603 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004604 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004605}
bellardbeac80c2006-06-26 20:08:57 +00004606
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004607static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4608 int64_t sector_num,
4609 QEMUIOVector *qiov,
4610 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004611 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004612 void *opaque,
4613 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004614
bellardea2384d2004-08-01 21:59:26 +00004615{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004616 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004617
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004618 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004619 acb->is_write = is_write;
4620 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004621 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004622 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004623
Kevin Wolf857d4f42014-05-20 13:16:51 +02004624 if (acb->bounce == NULL) {
4625 acb->ret = -ENOMEM;
4626 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004627 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004628 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004629 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004630 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004631 }
4632
pbrookce1a14d2006-08-07 02:38:06 +00004633 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004634
pbrookce1a14d2006-08-07 02:38:06 +00004635 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004636}
4637
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004638static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004639 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004640 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004641{
aliguorif141eaf2009-04-07 18:43:24 +00004642 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004643}
4644
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004645static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004646 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004647 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004648{
4649 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4650}
4651
Kevin Wolf68485422011-06-30 10:05:46 +02004652
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004653typedef struct BlockAIOCBCoroutine {
4654 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004655 BlockRequest req;
4656 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004657 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004658 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004659} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004660
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004661static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004662 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004663};
4664
Paolo Bonzini35246a62011-10-14 10:41:29 +02004665static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004666{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004667 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004668
4669 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004670
Kevin Wolf68485422011-06-30 10:05:46 +02004671 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004672 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004673}
4674
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004675/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4676static void coroutine_fn bdrv_co_do_rw(void *opaque)
4677{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004678 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004679 BlockDriverState *bs = acb->common.bs;
4680
4681 if (!acb->is_write) {
4682 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004683 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004684 } else {
4685 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004686 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004687 }
4688
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004689 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004690 qemu_bh_schedule(acb->bh);
4691}
4692
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004693static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4694 int64_t sector_num,
4695 QEMUIOVector *qiov,
4696 int nb_sectors,
4697 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004698 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004699 void *opaque,
4700 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004701{
4702 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004703 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004704
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004705 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004706 acb->req.sector = sector_num;
4707 acb->req.nb_sectors = nb_sectors;
4708 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004709 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004710 acb->is_write = is_write;
4711
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004712 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004713 qemu_coroutine_enter(co, acb);
4714
4715 return &acb->common;
4716}
4717
Paolo Bonzini07f07612011-10-17 12:32:12 +02004718static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004719{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004720 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004721 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004722
Paolo Bonzini07f07612011-10-17 12:32:12 +02004723 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004724 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004725 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004726}
4727
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004728BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004729 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004730{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004731 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004732
Paolo Bonzini07f07612011-10-17 12:32:12 +02004733 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004734 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004735
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004736 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004737
Paolo Bonzini07f07612011-10-17 12:32:12 +02004738 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4739 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004740
Alexander Graf016f5cf2010-05-26 17:51:49 +02004741 return &acb->common;
4742}
4743
Paolo Bonzini4265d622011-10-17 12:32:14 +02004744static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4745{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004746 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004747 BlockDriverState *bs = acb->common.bs;
4748
4749 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004750 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004751 qemu_bh_schedule(acb->bh);
4752}
4753
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004754BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004755 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004756 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004757{
4758 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004759 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004760
4761 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4762
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004763 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004764 acb->req.sector = sector_num;
4765 acb->req.nb_sectors = nb_sectors;
4766 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4767 qemu_coroutine_enter(co, acb);
4768
4769 return &acb->common;
4770}
4771
bellardea2384d2004-08-01 21:59:26 +00004772void bdrv_init(void)
4773{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004774 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004775}
pbrookce1a14d2006-08-07 02:38:06 +00004776
Markus Armbrustereb852012009-10-27 18:41:44 +01004777void bdrv_init_with_whitelist(void)
4778{
4779 use_bdrv_whitelist = 1;
4780 bdrv_init();
4781}
4782
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004783void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004784 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004785{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004786 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004787
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004788 acb = g_slice_alloc(aiocb_info->aiocb_size);
4789 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004790 acb->bs = bs;
4791 acb->cb = cb;
4792 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004793 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004794 return acb;
4795}
4796
Fam Zhengf197fe22014-09-11 13:41:08 +08004797void qemu_aio_ref(void *p)
4798{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004799 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004800 acb->refcnt++;
4801}
4802
Fam Zheng80074292014-09-11 13:41:28 +08004803void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004804{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004805 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004806 assert(acb->refcnt > 0);
4807 if (--acb->refcnt == 0) {
4808 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4809 }
pbrookce1a14d2006-08-07 02:38:06 +00004810}
bellard19cb3732006-08-19 11:45:59 +00004811
4812/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004813/* Coroutine block device emulation */
4814
4815typedef struct CoroutineIOCompletion {
4816 Coroutine *coroutine;
4817 int ret;
4818} CoroutineIOCompletion;
4819
4820static void bdrv_co_io_em_complete(void *opaque, int ret)
4821{
4822 CoroutineIOCompletion *co = opaque;
4823
4824 co->ret = ret;
4825 qemu_coroutine_enter(co->coroutine, NULL);
4826}
4827
4828static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4829 int nb_sectors, QEMUIOVector *iov,
4830 bool is_write)
4831{
4832 CoroutineIOCompletion co = {
4833 .coroutine = qemu_coroutine_self(),
4834 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004835 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004836
4837 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004838 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4839 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004840 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004841 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4842 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004843 }
4844
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004845 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004846 if (!acb) {
4847 return -EIO;
4848 }
4849 qemu_coroutine_yield();
4850
4851 return co.ret;
4852}
4853
4854static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4855 int64_t sector_num, int nb_sectors,
4856 QEMUIOVector *iov)
4857{
4858 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4859}
4860
4861static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4862 int64_t sector_num, int nb_sectors,
4863 QEMUIOVector *iov)
4864{
4865 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4866}
4867
Paolo Bonzini07f07612011-10-17 12:32:12 +02004868static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004869{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004870 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004871
Paolo Bonzini07f07612011-10-17 12:32:12 +02004872 rwco->ret = bdrv_co_flush(rwco->bs);
4873}
4874
4875int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4876{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004877 int ret;
4878
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004879 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004880 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004881 }
4882
Kevin Wolfca716362011-11-10 18:13:59 +01004883 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004884 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004885 if (bs->drv->bdrv_co_flush_to_os) {
4886 ret = bs->drv->bdrv_co_flush_to_os(bs);
4887 if (ret < 0) {
4888 return ret;
4889 }
4890 }
4891
Kevin Wolfca716362011-11-10 18:13:59 +01004892 /* But don't actually force it to the disk with cache=unsafe */
4893 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004894 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004895 }
4896
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004897 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004898 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004899 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004900 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004901 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004902 CoroutineIOCompletion co = {
4903 .coroutine = qemu_coroutine_self(),
4904 };
4905
4906 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4907 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004908 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004909 } else {
4910 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004911 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004912 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004913 } else {
4914 /*
4915 * Some block drivers always operate in either writethrough or unsafe
4916 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4917 * know how the server works (because the behaviour is hardcoded or
4918 * depends on server-side configuration), so we can't ensure that
4919 * everything is safe on disk. Returning an error doesn't work because
4920 * that would break guests even if the server operates in writethrough
4921 * mode.
4922 *
4923 * Let's hope the user knows what he's doing.
4924 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004925 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004926 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004927 if (ret < 0) {
4928 return ret;
4929 }
4930
4931 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4932 * in the case of cache=unsafe, so there are no useless flushes.
4933 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004934flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004935 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004936}
4937
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004938void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004939{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004940 Error *local_err = NULL;
4941 int ret;
4942
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004943 if (!bs->drv) {
4944 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06004945 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004946
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11004947 if (!(bs->open_flags & BDRV_O_INCOMING)) {
4948 return;
4949 }
4950 bs->open_flags &= ~BDRV_O_INCOMING;
4951
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004952 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004953 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004954 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004955 bdrv_invalidate_cache(bs->file, &local_err);
4956 }
4957 if (local_err) {
4958 error_propagate(errp, local_err);
4959 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004960 }
4961
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004962 ret = refresh_total_sectors(bs, bs->total_sectors);
4963 if (ret < 0) {
4964 error_setg_errno(errp, -ret, "Could not refresh total sector count");
4965 return;
4966 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004967}
4968
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004969void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004970{
4971 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004972 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06004973
Benoît Canetdc364f42014-01-23 21:31:32 +01004974 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004975 AioContext *aio_context = bdrv_get_aio_context(bs);
4976
4977 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004978 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004979 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004980 if (local_err) {
4981 error_propagate(errp, local_err);
4982 return;
4983 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004984 }
4985}
4986
Paolo Bonzini07f07612011-10-17 12:32:12 +02004987int bdrv_flush(BlockDriverState *bs)
4988{
4989 Coroutine *co;
4990 RwCo rwco = {
4991 .bs = bs,
4992 .ret = NOT_DONE,
4993 };
4994
4995 if (qemu_in_coroutine()) {
4996 /* Fast-path if already in coroutine context */
4997 bdrv_flush_co_entry(&rwco);
4998 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004999 AioContext *aio_context = bdrv_get_aio_context(bs);
5000
Paolo Bonzini07f07612011-10-17 12:32:12 +02005001 co = qemu_coroutine_create(bdrv_flush_co_entry);
5002 qemu_coroutine_enter(co, &rwco);
5003 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005004 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005005 }
5006 }
5007
5008 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005009}
5010
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005011typedef struct DiscardCo {
5012 BlockDriverState *bs;
5013 int64_t sector_num;
5014 int nb_sectors;
5015 int ret;
5016} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005017static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5018{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005019 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005020
5021 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5022}
5023
Peter Lieven6f14da52013-10-24 12:06:59 +02005024/* if no limit is specified in the BlockLimits use a default
5025 * of 32768 512-byte sectors (16 MiB) per request.
5026 */
5027#define MAX_DISCARD_DEFAULT 32768
5028
Paolo Bonzini4265d622011-10-17 12:32:14 +02005029int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5030 int nb_sectors)
5031{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005032 int max_discard;
5033
Paolo Bonzini4265d622011-10-17 12:32:14 +02005034 if (!bs->drv) {
5035 return -ENOMEDIUM;
5036 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5037 return -EIO;
5038 } else if (bs->read_only) {
5039 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005040 }
5041
Fam Zhenge4654d22013-11-13 18:29:43 +08005042 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005043
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005044 /* Do nothing if disabled. */
5045 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5046 return 0;
5047 }
5048
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005049 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005050 return 0;
5051 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005052
5053 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5054 while (nb_sectors > 0) {
5055 int ret;
5056 int num = nb_sectors;
5057
5058 /* align request */
5059 if (bs->bl.discard_alignment &&
5060 num >= bs->bl.discard_alignment &&
5061 sector_num % bs->bl.discard_alignment) {
5062 if (num > bs->bl.discard_alignment) {
5063 num = bs->bl.discard_alignment;
5064 }
5065 num -= sector_num % bs->bl.discard_alignment;
5066 }
5067
5068 /* limit request size */
5069 if (num > max_discard) {
5070 num = max_discard;
5071 }
5072
5073 if (bs->drv->bdrv_co_discard) {
5074 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5075 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005076 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005077 CoroutineIOCompletion co = {
5078 .coroutine = qemu_coroutine_self(),
5079 };
5080
5081 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5082 bdrv_co_io_em_complete, &co);
5083 if (acb == NULL) {
5084 return -EIO;
5085 } else {
5086 qemu_coroutine_yield();
5087 ret = co.ret;
5088 }
5089 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005090 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005091 return ret;
5092 }
5093
5094 sector_num += num;
5095 nb_sectors -= num;
5096 }
5097 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005098}
5099
5100int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5101{
5102 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005103 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005104 .bs = bs,
5105 .sector_num = sector_num,
5106 .nb_sectors = nb_sectors,
5107 .ret = NOT_DONE,
5108 };
5109
5110 if (qemu_in_coroutine()) {
5111 /* Fast-path if already in coroutine context */
5112 bdrv_discard_co_entry(&rwco);
5113 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005114 AioContext *aio_context = bdrv_get_aio_context(bs);
5115
Paolo Bonzini4265d622011-10-17 12:32:14 +02005116 co = qemu_coroutine_create(bdrv_discard_co_entry);
5117 qemu_coroutine_enter(co, &rwco);
5118 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005119 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005120 }
5121 }
5122
5123 return rwco.ret;
5124}
5125
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005126/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005127/* removable device support */
5128
5129/**
5130 * Return TRUE if the media is present
5131 */
5132int bdrv_is_inserted(BlockDriverState *bs)
5133{
5134 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005135
bellard19cb3732006-08-19 11:45:59 +00005136 if (!drv)
5137 return 0;
5138 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005139 return 1;
5140 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005141}
5142
5143/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005144 * Return whether the media changed since the last call to this
5145 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005146 */
5147int bdrv_media_changed(BlockDriverState *bs)
5148{
5149 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005150
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005151 if (drv && drv->bdrv_media_changed) {
5152 return drv->bdrv_media_changed(bs);
5153 }
5154 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005155}
5156
5157/**
5158 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5159 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005160void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005161{
5162 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005163 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005164
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005165 if (drv && drv->bdrv_eject) {
5166 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005167 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005168
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005169 device_name = bdrv_get_device_name(bs);
5170 if (device_name[0] != '\0') {
5171 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005172 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005173 }
bellard19cb3732006-08-19 11:45:59 +00005174}
5175
bellard19cb3732006-08-19 11:45:59 +00005176/**
5177 * Lock or unlock the media (if it is locked, the user won't be able
5178 * to eject it manually).
5179 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005180void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005181{
5182 BlockDriver *drv = bs->drv;
5183
Markus Armbruster025e8492011-09-06 18:58:47 +02005184 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005185
Markus Armbruster025e8492011-09-06 18:58:47 +02005186 if (drv && drv->bdrv_lock_medium) {
5187 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005188 }
5189}
ths985a03b2007-12-24 16:10:43 +00005190
5191/* needed for generic scsi interface */
5192
5193int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5194{
5195 BlockDriver *drv = bs->drv;
5196
5197 if (drv && drv->bdrv_ioctl)
5198 return drv->bdrv_ioctl(bs, req, buf);
5199 return -ENOTSUP;
5200}
aliguori7d780662009-03-12 19:57:08 +00005201
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005202BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005203 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005204 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005205{
aliguori221f7152009-03-28 17:28:41 +00005206 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005207
aliguori221f7152009-03-28 17:28:41 +00005208 if (drv && drv->bdrv_aio_ioctl)
5209 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5210 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005211}
aliguorie268ca52009-04-22 20:20:00 +00005212
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005213void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005214{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005215 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005216}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005217
aliguorie268ca52009-04-22 20:20:00 +00005218void *qemu_blockalign(BlockDriverState *bs, size_t size)
5219{
Kevin Wolf339064d2013-11-28 10:23:32 +01005220 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005221}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005222
Max Reitz9ebd8442014-10-22 14:09:27 +02005223void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5224{
5225 return memset(qemu_blockalign(bs, size), 0, size);
5226}
5227
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005228void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5229{
5230 size_t align = bdrv_opt_mem_align(bs);
5231
5232 /* Ensure that NULL is never returned on success */
5233 assert(align > 0);
5234 if (size == 0) {
5235 size = align;
5236 }
5237
5238 return qemu_try_memalign(align, size);
5239}
5240
Max Reitz9ebd8442014-10-22 14:09:27 +02005241void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5242{
5243 void *mem = qemu_try_blockalign(bs, size);
5244
5245 if (mem) {
5246 memset(mem, 0, size);
5247 }
5248
5249 return mem;
5250}
5251
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005252/*
5253 * Check if all memory in this vector is sector aligned.
5254 */
5255bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5256{
5257 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005258 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005259
5260 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005261 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005262 return false;
5263 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005264 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005265 return false;
5266 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005267 }
5268
5269 return true;
5270}
5271
Fam Zhengb8afb522014-04-16 09:34:30 +08005272BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5273 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005274{
5275 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005276 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005277
Paolo Bonzini50717e92013-01-21 17:09:45 +01005278 assert((granularity & (granularity - 1)) == 0);
5279
Fam Zhenge4654d22013-11-13 18:29:43 +08005280 granularity >>= BDRV_SECTOR_BITS;
5281 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005282 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005283 if (bitmap_size < 0) {
5284 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5285 errno = -bitmap_size;
5286 return NULL;
5287 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005288 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005289 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5290 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5291 return bitmap;
5292}
5293
5294void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5295{
5296 BdrvDirtyBitmap *bm, *next;
5297 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5298 if (bm == bitmap) {
5299 QLIST_REMOVE(bitmap, list);
5300 hbitmap_free(bitmap->bitmap);
5301 g_free(bitmap);
5302 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005303 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005304 }
5305}
5306
Fam Zheng21b56832013-11-13 18:29:44 +08005307BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5308{
5309 BdrvDirtyBitmap *bm;
5310 BlockDirtyInfoList *list = NULL;
5311 BlockDirtyInfoList **plist = &list;
5312
5313 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005314 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5315 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005316 info->count = bdrv_get_dirty_count(bs, bm);
5317 info->granularity =
5318 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5319 entry->value = info;
5320 *plist = entry;
5321 plist = &entry->next;
5322 }
5323
5324 return list;
5325}
5326
Fam Zhenge4654d22013-11-13 18:29:43 +08005327int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005328{
Fam Zhenge4654d22013-11-13 18:29:43 +08005329 if (bitmap) {
5330 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005331 } else {
5332 return 0;
5333 }
5334}
5335
Fam Zhenge4654d22013-11-13 18:29:43 +08005336void bdrv_dirty_iter_init(BlockDriverState *bs,
5337 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005338{
Fam Zhenge4654d22013-11-13 18:29:43 +08005339 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005340}
5341
5342void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5343 int nr_sectors)
5344{
Fam Zhenge4654d22013-11-13 18:29:43 +08005345 BdrvDirtyBitmap *bitmap;
5346 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5347 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005348 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005349}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005350
Fam Zhenge4654d22013-11-13 18:29:43 +08005351void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5352{
5353 BdrvDirtyBitmap *bitmap;
5354 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5355 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5356 }
5357}
5358
5359int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5360{
5361 return hbitmap_count(bitmap->bitmap);
5362}
5363
Fam Zheng9fcb0252013-08-23 09:14:46 +08005364/* Get a reference to bs */
5365void bdrv_ref(BlockDriverState *bs)
5366{
5367 bs->refcnt++;
5368}
5369
5370/* Release a previously grabbed reference to bs.
5371 * If after releasing, reference count is zero, the BlockDriverState is
5372 * deleted. */
5373void bdrv_unref(BlockDriverState *bs)
5374{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005375 if (!bs) {
5376 return;
5377 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005378 assert(bs->refcnt > 0);
5379 if (--bs->refcnt == 0) {
5380 bdrv_delete(bs);
5381 }
5382}
5383
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005384struct BdrvOpBlocker {
5385 Error *reason;
5386 QLIST_ENTRY(BdrvOpBlocker) list;
5387};
5388
5389bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5390{
5391 BdrvOpBlocker *blocker;
5392 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5393 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5394 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5395 if (errp) {
5396 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005397 bdrv_get_device_name(bs),
5398 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005399 }
5400 return true;
5401 }
5402 return false;
5403}
5404
5405void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5406{
5407 BdrvOpBlocker *blocker;
5408 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5409
Markus Armbruster5839e532014-08-19 10:31:08 +02005410 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005411 blocker->reason = reason;
5412 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5413}
5414
5415void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5416{
5417 BdrvOpBlocker *blocker, *next;
5418 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5419 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5420 if (blocker->reason == reason) {
5421 QLIST_REMOVE(blocker, list);
5422 g_free(blocker);
5423 }
5424 }
5425}
5426
5427void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5428{
5429 int i;
5430 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5431 bdrv_op_block(bs, i, reason);
5432 }
5433}
5434
5435void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5436{
5437 int i;
5438 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5439 bdrv_op_unblock(bs, i, reason);
5440 }
5441}
5442
5443bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5444{
5445 int i;
5446
5447 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5448 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5449 return false;
5450 }
5451 }
5452 return true;
5453}
5454
Luiz Capitulino28a72822011-09-26 17:43:50 -03005455void bdrv_iostatus_enable(BlockDriverState *bs)
5456{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005457 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005458 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005459}
5460
5461/* The I/O status is only enabled if the drive explicitly
5462 * enables it _and_ the VM is configured to stop on errors */
5463bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5464{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005465 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005466 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5467 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5468 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005469}
5470
5471void bdrv_iostatus_disable(BlockDriverState *bs)
5472{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005473 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005474}
5475
5476void bdrv_iostatus_reset(BlockDriverState *bs)
5477{
5478 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005479 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005480 if (bs->job) {
5481 block_job_iostatus_reset(bs->job);
5482 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005483 }
5484}
5485
Luiz Capitulino28a72822011-09-26 17:43:50 -03005486void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5487{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005488 assert(bdrv_iostatus_is_enabled(bs));
5489 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005490 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5491 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005492 }
5493}
5494
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005495void bdrv_img_create(const char *filename, const char *fmt,
5496 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005497 char *options, uint64_t img_size, int flags,
5498 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005499{
Chunyan Liu83d05212014-06-05 17:20:51 +08005500 QemuOptsList *create_opts = NULL;
5501 QemuOpts *opts = NULL;
5502 const char *backing_fmt, *backing_file;
5503 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005504 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005505 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005506 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005507 int ret = 0;
5508
5509 /* Find driver and parse its options */
5510 drv = bdrv_find_format(fmt);
5511 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005512 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005513 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005514 }
5515
Kevin Wolf98289622013-07-10 15:47:39 +02005516 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005517 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005518 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005519 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005520 }
5521
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005522 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5523 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005524
5525 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005526 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5527 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005528
5529 /* Parse -o options */
5530 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005531 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5532 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005533 goto out;
5534 }
5535 }
5536
5537 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005538 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005539 error_setg(errp, "Backing file not supported for file format '%s'",
5540 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005541 goto out;
5542 }
5543 }
5544
5545 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005546 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005547 error_setg(errp, "Backing file format not supported for file "
5548 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005549 goto out;
5550 }
5551 }
5552
Chunyan Liu83d05212014-06-05 17:20:51 +08005553 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5554 if (backing_file) {
5555 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005556 error_setg(errp, "Error: Trying to create an image with the "
5557 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005558 goto out;
5559 }
5560 }
5561
Chunyan Liu83d05212014-06-05 17:20:51 +08005562 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5563 if (backing_fmt) {
5564 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005565 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005566 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005567 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005568 goto out;
5569 }
5570 }
5571
5572 // The size for the image must always be specified, with one exception:
5573 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005574 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5575 if (size == -1) {
5576 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005577 BlockDriverState *bs;
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005578 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005579 int back_flags;
5580
5581 /* backing files always opened read-only */
5582 back_flags =
5583 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005584
Max Reitzf67503e2014-02-18 18:33:05 +01005585 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005586 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005587 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005588 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005589 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005590 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005591 error_get_pretty(local_err));
5592 error_free(local_err);
5593 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005594 goto out;
5595 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005596 size = bdrv_getlength(bs);
5597 if (size < 0) {
5598 error_setg_errno(errp, -size, "Could not get size of '%s'",
5599 backing_file);
5600 bdrv_unref(bs);
5601 goto out;
5602 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005603
Chunyan Liu83d05212014-06-05 17:20:51 +08005604 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005605
5606 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005607 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005608 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005609 goto out;
5610 }
5611 }
5612
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005613 if (!quiet) {
5614 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005615 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005616 puts("");
5617 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005618
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005619 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005620
Max Reitzcc84d902013-09-06 17:14:26 +02005621 if (ret == -EFBIG) {
5622 /* This is generally a better message than whatever the driver would
5623 * deliver (especially because of the cluster_size_hint), since that
5624 * is most probably not much different from "image too large". */
5625 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005626 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005627 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005628 }
Max Reitzcc84d902013-09-06 17:14:26 +02005629 error_setg(errp, "The image size is too large for file format '%s'"
5630 "%s", fmt, cluster_size_hint);
5631 error_free(local_err);
5632 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005633 }
5634
5635out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005636 qemu_opts_del(opts);
5637 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005638 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005639 error_propagate(errp, local_err);
5640 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005641}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005642
5643AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5644{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005645 return bs->aio_context;
5646}
5647
5648void bdrv_detach_aio_context(BlockDriverState *bs)
5649{
Max Reitz33384422014-06-20 21:57:33 +02005650 BdrvAioNotifier *baf;
5651
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005652 if (!bs->drv) {
5653 return;
5654 }
5655
Max Reitz33384422014-06-20 21:57:33 +02005656 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5657 baf->detach_aio_context(baf->opaque);
5658 }
5659
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005660 if (bs->io_limits_enabled) {
5661 throttle_detach_aio_context(&bs->throttle_state);
5662 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005663 if (bs->drv->bdrv_detach_aio_context) {
5664 bs->drv->bdrv_detach_aio_context(bs);
5665 }
5666 if (bs->file) {
5667 bdrv_detach_aio_context(bs->file);
5668 }
5669 if (bs->backing_hd) {
5670 bdrv_detach_aio_context(bs->backing_hd);
5671 }
5672
5673 bs->aio_context = NULL;
5674}
5675
5676void bdrv_attach_aio_context(BlockDriverState *bs,
5677 AioContext *new_context)
5678{
Max Reitz33384422014-06-20 21:57:33 +02005679 BdrvAioNotifier *ban;
5680
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005681 if (!bs->drv) {
5682 return;
5683 }
5684
5685 bs->aio_context = new_context;
5686
5687 if (bs->backing_hd) {
5688 bdrv_attach_aio_context(bs->backing_hd, new_context);
5689 }
5690 if (bs->file) {
5691 bdrv_attach_aio_context(bs->file, new_context);
5692 }
5693 if (bs->drv->bdrv_attach_aio_context) {
5694 bs->drv->bdrv_attach_aio_context(bs, new_context);
5695 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005696 if (bs->io_limits_enabled) {
5697 throttle_attach_aio_context(&bs->throttle_state, new_context);
5698 }
Max Reitz33384422014-06-20 21:57:33 +02005699
5700 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5701 ban->attached_aio_context(new_context, ban->opaque);
5702 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005703}
5704
5705void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5706{
5707 bdrv_drain_all(); /* ensure there are no in-flight requests */
5708
5709 bdrv_detach_aio_context(bs);
5710
5711 /* This function executes in the old AioContext so acquire the new one in
5712 * case it runs in a different thread.
5713 */
5714 aio_context_acquire(new_context);
5715 bdrv_attach_aio_context(bs, new_context);
5716 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005717}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005718
Max Reitz33384422014-06-20 21:57:33 +02005719void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5720 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5721 void (*detach_aio_context)(void *opaque), void *opaque)
5722{
5723 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5724 *ban = (BdrvAioNotifier){
5725 .attached_aio_context = attached_aio_context,
5726 .detach_aio_context = detach_aio_context,
5727 .opaque = opaque
5728 };
5729
5730 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5731}
5732
5733void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5734 void (*attached_aio_context)(AioContext *,
5735 void *),
5736 void (*detach_aio_context)(void *),
5737 void *opaque)
5738{
5739 BdrvAioNotifier *ban, *ban_next;
5740
5741 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5742 if (ban->attached_aio_context == attached_aio_context &&
5743 ban->detach_aio_context == detach_aio_context &&
5744 ban->opaque == opaque)
5745 {
5746 QLIST_REMOVE(ban, list);
5747 g_free(ban);
5748
5749 return;
5750 }
5751 }
5752
5753 abort();
5754}
5755
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005756void bdrv_add_before_write_notifier(BlockDriverState *bs,
5757 NotifierWithReturn *notifier)
5758{
5759 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5760}
Max Reitz6f176b42013-09-03 10:09:50 +02005761
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005762int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005763{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005764 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005765 return -ENOTSUP;
5766 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005767 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005768}
Benoît Canetf6186f42013-10-02 14:33:48 +02005769
Benoît Canetb5042a32014-03-03 19:11:34 +01005770/* This function will be called by the bdrv_recurse_is_first_non_filter method
5771 * of block filter and by bdrv_is_first_non_filter.
5772 * It is used to test if the given bs is the candidate or recurse more in the
5773 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005774 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005775bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5776 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005777{
Benoît Canetb5042a32014-03-03 19:11:34 +01005778 /* return false if basic checks fails */
5779 if (!bs || !bs->drv) {
5780 return false;
5781 }
5782
5783 /* the code reached a non block filter driver -> check if the bs is
5784 * the same as the candidate. It's the recursion termination condition.
5785 */
5786 if (!bs->drv->is_filter) {
5787 return bs == candidate;
5788 }
5789 /* Down this path the driver is a block filter driver */
5790
5791 /* If the block filter recursion method is defined use it to recurse down
5792 * the node graph.
5793 */
5794 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005795 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5796 }
5797
Benoît Canetb5042a32014-03-03 19:11:34 +01005798 /* the driver is a block filter but don't allow to recurse -> return false
5799 */
5800 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005801}
5802
5803/* This function checks if the candidate is the first non filter bs down it's
5804 * bs chain. Since we don't have pointers to parents it explore all bs chains
5805 * from the top. Some filters can choose not to pass down the recursion.
5806 */
5807bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5808{
5809 BlockDriverState *bs;
5810
5811 /* walk down the bs forest recursively */
5812 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5813 bool perm;
5814
Benoît Canetb5042a32014-03-03 19:11:34 +01005815 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005816 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005817
5818 /* candidate is the first non filter */
5819 if (perm) {
5820 return true;
5821 }
5822 }
5823
5824 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005825}
Benoît Canet09158f02014-06-27 18:25:25 +02005826
5827BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5828{
5829 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5830 if (!to_replace_bs) {
5831 error_setg(errp, "Node name '%s' not found", node_name);
5832 return NULL;
5833 }
5834
5835 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5836 return NULL;
5837 }
5838
5839 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5840 * most non filter in order to prevent data corruption.
5841 * Another benefit is that this tests exclude backing files which are
5842 * blocked by the backing blockers.
5843 */
5844 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5845 error_setg(errp, "Only top most non filter can be replaced");
5846 return NULL;
5847 }
5848
5849 return to_replace_bs;
5850}
Ming Lei448ad912014-07-04 18:04:33 +08005851
5852void bdrv_io_plug(BlockDriverState *bs)
5853{
5854 BlockDriver *drv = bs->drv;
5855 if (drv && drv->bdrv_io_plug) {
5856 drv->bdrv_io_plug(bs);
5857 } else if (bs->file) {
5858 bdrv_io_plug(bs->file);
5859 }
5860}
5861
5862void bdrv_io_unplug(BlockDriverState *bs)
5863{
5864 BlockDriver *drv = bs->drv;
5865 if (drv && drv->bdrv_io_unplug) {
5866 drv->bdrv_io_unplug(bs);
5867 } else if (bs->file) {
5868 bdrv_io_unplug(bs->file);
5869 }
5870}
5871
5872void bdrv_flush_io_queue(BlockDriverState *bs)
5873{
5874 BlockDriver *drv = bs->drv;
5875 if (drv && drv->bdrv_flush_io_queue) {
5876 drv->bdrv_flush_io_queue(bs);
5877 } else if (bs->file) {
5878 bdrv_flush_io_queue(bs->file);
5879 }
5880}
Max Reitz91af7012014-07-18 20:24:56 +02005881
5882static bool append_open_options(QDict *d, BlockDriverState *bs)
5883{
5884 const QDictEntry *entry;
5885 bool found_any = false;
5886
5887 for (entry = qdict_first(bs->options); entry;
5888 entry = qdict_next(bs->options, entry))
5889 {
5890 /* Only take options for this level and exclude all non-driver-specific
5891 * options */
5892 if (!strchr(qdict_entry_key(entry), '.') &&
5893 strcmp(qdict_entry_key(entry), "node-name"))
5894 {
5895 qobject_incref(qdict_entry_value(entry));
5896 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5897 found_any = true;
5898 }
5899 }
5900
5901 return found_any;
5902}
5903
5904/* Updates the following BDS fields:
5905 * - exact_filename: A filename which may be used for opening a block device
5906 * which (mostly) equals the given BDS (even without any
5907 * other options; so reading and writing must return the same
5908 * results, but caching etc. may be different)
5909 * - full_open_options: Options which, when given when opening a block device
5910 * (without a filename), result in a BDS (mostly)
5911 * equalling the given one
5912 * - filename: If exact_filename is set, it is copied here. Otherwise,
5913 * full_open_options is converted to a JSON object, prefixed with
5914 * "json:" (for use through the JSON pseudo protocol) and put here.
5915 */
5916void bdrv_refresh_filename(BlockDriverState *bs)
5917{
5918 BlockDriver *drv = bs->drv;
5919 QDict *opts;
5920
5921 if (!drv) {
5922 return;
5923 }
5924
5925 /* This BDS's file name will most probably depend on its file's name, so
5926 * refresh that first */
5927 if (bs->file) {
5928 bdrv_refresh_filename(bs->file);
5929 }
5930
5931 if (drv->bdrv_refresh_filename) {
5932 /* Obsolete information is of no use here, so drop the old file name
5933 * information before refreshing it */
5934 bs->exact_filename[0] = '\0';
5935 if (bs->full_open_options) {
5936 QDECREF(bs->full_open_options);
5937 bs->full_open_options = NULL;
5938 }
5939
5940 drv->bdrv_refresh_filename(bs);
5941 } else if (bs->file) {
5942 /* Try to reconstruct valid information from the underlying file */
5943 bool has_open_options;
5944
5945 bs->exact_filename[0] = '\0';
5946 if (bs->full_open_options) {
5947 QDECREF(bs->full_open_options);
5948 bs->full_open_options = NULL;
5949 }
5950
5951 opts = qdict_new();
5952 has_open_options = append_open_options(opts, bs);
5953
5954 /* If no specific options have been given for this BDS, the filename of
5955 * the underlying file should suffice for this one as well */
5956 if (bs->file->exact_filename[0] && !has_open_options) {
5957 strcpy(bs->exact_filename, bs->file->exact_filename);
5958 }
5959 /* Reconstructing the full options QDict is simple for most format block
5960 * drivers, as long as the full options are known for the underlying
5961 * file BDS. The full options QDict of that file BDS should somehow
5962 * contain a representation of the filename, therefore the following
5963 * suffices without querying the (exact_)filename of this BDS. */
5964 if (bs->file->full_open_options) {
5965 qdict_put_obj(opts, "driver",
5966 QOBJECT(qstring_from_str(drv->format_name)));
5967 QINCREF(bs->file->full_open_options);
5968 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
5969
5970 bs->full_open_options = opts;
5971 } else {
5972 QDECREF(opts);
5973 }
5974 } else if (!bs->full_open_options && qdict_size(bs->options)) {
5975 /* There is no underlying file BDS (at least referenced by BDS.file),
5976 * so the full options QDict should be equal to the options given
5977 * specifically for this block device when it was opened (plus the
5978 * driver specification).
5979 * Because those options don't change, there is no need to update
5980 * full_open_options when it's already set. */
5981
5982 opts = qdict_new();
5983 append_open_options(opts, bs);
5984 qdict_put_obj(opts, "driver",
5985 QOBJECT(qstring_from_str(drv->format_name)));
5986
5987 if (bs->exact_filename[0]) {
5988 /* This may not work for all block protocol drivers (some may
5989 * require this filename to be parsed), but we have to find some
5990 * default solution here, so just include it. If some block driver
5991 * does not support pure options without any filename at all or
5992 * needs some special format of the options QDict, it needs to
5993 * implement the driver-specific bdrv_refresh_filename() function.
5994 */
5995 qdict_put_obj(opts, "filename",
5996 QOBJECT(qstring_from_str(bs->exact_filename)));
5997 }
5998
5999 bs->full_open_options = opts;
6000 }
6001
6002 if (bs->exact_filename[0]) {
6003 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6004 } else if (bs->full_open_options) {
6005 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6006 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6007 qstring_get_str(json));
6008 QDECREF(json);
6009 }
6010}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006011
6012/* This accessor function purpose is to allow the device models to access the
6013 * BlockAcctStats structure embedded inside a BlockDriverState without being
6014 * aware of the BlockDriverState structure layout.
6015 * It will go away when the BlockAcctStats structure will be moved inside
6016 * the device models.
6017 */
6018BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6019{
6020 return &bs->stats;
6021}