blob: 4121929c4b592a064a653f65685fb91b2f910cea [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
John Snow9bd2b082015-04-17 19:49:57 -040055/**
56 * A BdrvDirtyBitmap can be in three possible states:
57 * (1) successor is NULL and disabled is false: full r/w mode
58 * (2) successor is NULL and disabled is true: read only mode ("disabled")
59 * (3) successor is set: frozen mode.
60 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
61 * or enabled. A frozen bitmap can only abdicate() or reclaim().
62 */
Fam Zhenge4654d22013-11-13 18:29:43 +080063struct BdrvDirtyBitmap {
John Snowaa0c7ca2015-04-17 19:50:01 -040064 HBitmap *bitmap; /* Dirty sector bitmap implementation */
65 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
66 char *name; /* Optional non-empty unique ID */
67 int64_t size; /* Size of the bitmap (Number of sectors) */
68 bool disabled; /* Bitmap is read-only */
Fam Zhenge4654d22013-11-13 18:29:43 +080069 QLIST_ENTRY(BdrvDirtyBitmap) list;
70};
71
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010072#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
73
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020074static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000075 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020076 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020077static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000078 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020079 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020080static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
81 int64_t sector_num, int nb_sectors,
82 QEMUIOVector *iov);
83static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors,
85 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010086static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
87 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000088 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010089static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
90 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000091 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020092static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
93 int64_t sector_num,
94 QEMUIOVector *qiov,
95 int nb_sectors,
96 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020097 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020098 void *opaque,
99 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +0100100static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +0100101static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +0200102 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +0000103
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100104static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
105 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +0000106
Benoît Canetdc364f42014-01-23 21:31:32 +0100107static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
108 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
109
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100110static QLIST_HEAD(, BlockDriver) bdrv_drivers =
111 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000112
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300113static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
114 int nr_sectors);
115static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
116 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100117/* If non-zero, use only whitelisted block drivers */
118static int use_bdrv_whitelist;
119
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000120#ifdef _WIN32
121static int is_windows_drive_prefix(const char *filename)
122{
123 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
124 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
125 filename[1] == ':');
126}
127
128int is_windows_drive(const char *filename)
129{
130 if (is_windows_drive_prefix(filename) &&
131 filename[2] == '\0')
132 return 1;
133 if (strstart(filename, "\\\\.\\", NULL) ||
134 strstart(filename, "//./", NULL))
135 return 1;
136 return 0;
137}
138#endif
139
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800140/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200141void bdrv_set_io_limits(BlockDriverState *bs,
142 ThrottleConfig *cfg)
143{
144 int i;
145
146 throttle_config(&bs->throttle_state, cfg);
147
148 for (i = 0; i < 2; i++) {
149 qemu_co_enter_next(&bs->throttled_reqs[i]);
150 }
151}
152
153/* this function drain all the throttled IOs */
154static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
155{
156 bool drained = false;
157 bool enabled = bs->io_limits_enabled;
158 int i;
159
160 bs->io_limits_enabled = false;
161
162 for (i = 0; i < 2; i++) {
163 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
164 drained = true;
165 }
166 }
167
168 bs->io_limits_enabled = enabled;
169
170 return drained;
171}
172
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800173void bdrv_io_limits_disable(BlockDriverState *bs)
174{
175 bs->io_limits_enabled = false;
176
Benoît Canetcc0681c2013-09-02 14:14:39 +0200177 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800178
Benoît Canetcc0681c2013-09-02 14:14:39 +0200179 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800180}
181
Benoît Canetcc0681c2013-09-02 14:14:39 +0200182static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800183{
184 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200185 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800186}
187
Benoît Canetcc0681c2013-09-02 14:14:39 +0200188static void bdrv_throttle_write_timer_cb(void *opaque)
189{
190 BlockDriverState *bs = opaque;
191 qemu_co_enter_next(&bs->throttled_reqs[1]);
192}
193
194/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800195void bdrv_io_limits_enable(BlockDriverState *bs)
196{
Fam Zhengde50a202015-03-25 15:27:26 +0800197 int clock_type = QEMU_CLOCK_REALTIME;
198
199 if (qtest_enabled()) {
200 /* For testing block IO throttling only */
201 clock_type = QEMU_CLOCK_VIRTUAL;
202 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200203 assert(!bs->io_limits_enabled);
204 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200205 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800206 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200207 bdrv_throttle_read_timer_cb,
208 bdrv_throttle_write_timer_cb,
209 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800210 bs->io_limits_enabled = true;
211}
212
Benoît Canetcc0681c2013-09-02 14:14:39 +0200213/* This function makes an IO wait if needed
214 *
215 * @nb_sectors: the number of sectors of the IO
216 * @is_write: is the IO a write
217 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100219 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200220 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800221{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200222 /* does this io must wait */
223 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800224
Benoît Canetcc0681c2013-09-02 14:14:39 +0200225 /* if must wait or any request of this type throttled queue the IO */
226 if (must_wait ||
227 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
228 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800229 }
230
Benoît Canetcc0681c2013-09-02 14:14:39 +0200231 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100232 throttle_account(&bs->throttle_state, is_write, bytes);
233
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800234
Benoît Canetcc0681c2013-09-02 14:14:39 +0200235 /* if the next request must wait -> do nothing */
236 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
237 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800238 }
239
Benoît Canetcc0681c2013-09-02 14:14:39 +0200240 /* else queue next request for execution */
241 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800242}
243
Kevin Wolf339064d2013-11-28 10:23:32 +0100244size_t bdrv_opt_mem_align(BlockDriverState *bs)
245{
246 if (!bs || !bs->drv) {
247 /* 4k should be on the safe side */
248 return 4096;
249 }
250
251 return bs->bl.opt_mem_alignment;
252}
253
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000254/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100255int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000256{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200257 const char *p;
258
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000259#ifdef _WIN32
260 if (is_windows_drive(path) ||
261 is_windows_drive_prefix(path)) {
262 return 0;
263 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200264 p = path + strcspn(path, ":/\\");
265#else
266 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000267#endif
268
Paolo Bonzini947995c2012-05-08 16:51:48 +0200269 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000270}
271
bellard83f64092006-08-01 16:21:11 +0000272int path_is_absolute(const char *path)
273{
bellard21664422007-01-07 18:22:37 +0000274#ifdef _WIN32
275 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200276 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000277 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200278 }
279 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000280#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200281 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000282#endif
bellard83f64092006-08-01 16:21:11 +0000283}
284
285/* if filename is absolute, just copy it to dest. Otherwise, build a
286 path to it by considering it is relative to base_path. URL are
287 supported. */
288void path_combine(char *dest, int dest_size,
289 const char *base_path,
290 const char *filename)
291{
292 const char *p, *p1;
293 int len;
294
295 if (dest_size <= 0)
296 return;
297 if (path_is_absolute(filename)) {
298 pstrcpy(dest, dest_size, filename);
299 } else {
300 p = strchr(base_path, ':');
301 if (p)
302 p++;
303 else
304 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000305 p1 = strrchr(base_path, '/');
306#ifdef _WIN32
307 {
308 const char *p2;
309 p2 = strrchr(base_path, '\\');
310 if (!p1 || p2 > p1)
311 p1 = p2;
312 }
313#endif
bellard83f64092006-08-01 16:21:11 +0000314 if (p1)
315 p1++;
316 else
317 p1 = base_path;
318 if (p1 > p)
319 p = p1;
320 len = p - base_path;
321 if (len > dest_size - 1)
322 len = dest_size - 1;
323 memcpy(dest, base_path, len);
324 dest[len] = '\0';
325 pstrcat(dest, dest_size, filename);
326 }
327}
328
Max Reitz0a828552014-11-26 17:20:25 +0100329void bdrv_get_full_backing_filename_from_filename(const char *backed,
330 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100331 char *dest, size_t sz,
332 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100333{
Max Reitz9f074292014-11-26 17:20:26 +0100334 if (backing[0] == '\0' || path_has_protocol(backing) ||
335 path_is_absolute(backing))
336 {
Max Reitz0a828552014-11-26 17:20:25 +0100337 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100338 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
339 error_setg(errp, "Cannot use relative backing file names for '%s'",
340 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100341 } else {
342 path_combine(dest, sz, backed, backing);
343 }
344}
345
Max Reitz9f074292014-11-26 17:20:26 +0100346void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
347 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200348{
Max Reitz9f074292014-11-26 17:20:26 +0100349 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
350
351 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
352 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200353}
354
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500355void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000356{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100357 /* Block drivers without coroutine functions need emulation */
358 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200359 bdrv->bdrv_co_readv = bdrv_co_readv_em;
360 bdrv->bdrv_co_writev = bdrv_co_writev_em;
361
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100362 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
363 * the block driver lacks aio we need to emulate that too.
364 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200365 if (!bdrv->bdrv_aio_readv) {
366 /* add AIO emulation layer */
367 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
368 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200369 }
bellard83f64092006-08-01 16:21:11 +0000370 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200371
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100372 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000373}
bellardb3380822004-03-14 21:38:54 +0000374
Markus Armbruster7f06d472014-10-07 13:59:12 +0200375BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000376{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200377 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200378
Markus Armbrustere4e99862014-10-07 13:59:03 +0200379 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200380 return bs;
381}
382
383BlockDriverState *bdrv_new(void)
384{
385 BlockDriverState *bs;
386 int i;
387
Markus Armbruster5839e532014-08-19 10:31:08 +0200388 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800389 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800390 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
391 QLIST_INIT(&bs->op_blockers[i]);
392 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300393 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200394 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200395 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200396 qemu_co_queue_init(&bs->throttled_reqs[0]);
397 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800398 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200399 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200400
bellardb3380822004-03-14 21:38:54 +0000401 return bs;
402}
403
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200404void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
405{
406 notifier_list_add(&bs->close_notifiers, notify);
407}
408
bellardea2384d2004-08-01 21:59:26 +0000409BlockDriver *bdrv_find_format(const char *format_name)
410{
411 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100412 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
413 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000414 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100415 }
bellardea2384d2004-08-01 21:59:26 +0000416 }
417 return NULL;
418}
419
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800420static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100421{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800422 static const char *whitelist_rw[] = {
423 CONFIG_BDRV_RW_WHITELIST
424 };
425 static const char *whitelist_ro[] = {
426 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100427 };
428 const char **p;
429
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800430 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100431 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800432 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100433
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800434 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100435 if (!strcmp(drv->format_name, *p)) {
436 return 1;
437 }
438 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800439 if (read_only) {
440 for (p = whitelist_ro; *p; p++) {
441 if (!strcmp(drv->format_name, *p)) {
442 return 1;
443 }
444 }
445 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100446 return 0;
447}
448
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800449BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
450 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100451{
452 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800453 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100454}
455
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800456typedef struct CreateCo {
457 BlockDriver *drv;
458 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800459 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800460 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200461 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800462} CreateCo;
463
464static void coroutine_fn bdrv_create_co_entry(void *opaque)
465{
Max Reitzcc84d902013-09-06 17:14:26 +0200466 Error *local_err = NULL;
467 int ret;
468
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469 CreateCo *cco = opaque;
470 assert(cco->drv);
471
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800472 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100473 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200474 error_propagate(&cco->err, local_err);
475 }
476 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800477}
478
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200479int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800480 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000481{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800482 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200483
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800484 Coroutine *co;
485 CreateCo cco = {
486 .drv = drv,
487 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800488 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800489 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200490 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800491 };
492
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800493 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200494 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300495 ret = -ENOTSUP;
496 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800497 }
498
499 if (qemu_in_coroutine()) {
500 /* Fast-path if already in coroutine context */
501 bdrv_create_co_entry(&cco);
502 } else {
503 co = qemu_coroutine_create(bdrv_create_co_entry);
504 qemu_coroutine_enter(co, &cco);
505 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200506 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800507 }
508 }
509
510 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200511 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100512 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200513 error_propagate(errp, cco.err);
514 } else {
515 error_setg_errno(errp, -ret, "Could not create image");
516 }
517 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800518
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300519out:
520 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800521 return ret;
bellardea2384d2004-08-01 21:59:26 +0000522}
523
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800524int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200525{
526 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200527 Error *local_err = NULL;
528 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200529
Max Reitzb65a5e12015-02-05 13:58:12 -0500530 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200531 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000532 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200533 }
534
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800535 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100536 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200537 error_propagate(errp, local_err);
538 }
539 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200540}
541
Kevin Wolf3baca892014-07-16 17:48:16 +0200542void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100543{
544 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200545 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100546
547 memset(&bs->bl, 0, sizeof(bs->bl));
548
Kevin Wolf466ad822013-12-11 19:50:32 +0100549 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200550 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100551 }
552
553 /* Take some limits from the children as a default */
554 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200555 bdrv_refresh_limits(bs->file, &local_err);
556 if (local_err) {
557 error_propagate(errp, local_err);
558 return;
559 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100560 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100561 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100562 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
563 } else {
564 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100565 }
566
567 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200568 bdrv_refresh_limits(bs->backing_hd, &local_err);
569 if (local_err) {
570 error_propagate(errp, local_err);
571 return;
572 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100573 bs->bl.opt_transfer_length =
574 MAX(bs->bl.opt_transfer_length,
575 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100576 bs->bl.max_transfer_length =
577 MIN_NON_ZERO(bs->bl.max_transfer_length,
578 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100579 bs->bl.opt_mem_alignment =
580 MAX(bs->bl.opt_mem_alignment,
581 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100582 }
583
584 /* Then let the driver override it */
585 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200586 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100587 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100588}
589
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100590/**
591 * Try to get @bs's logical and physical block size.
592 * On success, store them in @bsz struct and return 0.
593 * On failure return -errno.
594 * @bs must not be empty.
595 */
596int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
597{
598 BlockDriver *drv = bs->drv;
599
600 if (drv && drv->bdrv_probe_blocksizes) {
601 return drv->bdrv_probe_blocksizes(bs, bsz);
602 }
603
604 return -ENOTSUP;
605}
606
607/**
608 * Try to get @bs's geometry (cyls, heads, sectors).
609 * On success, store them in @geo struct and return 0.
610 * On failure return -errno.
611 * @bs must not be empty.
612 */
613int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
614{
615 BlockDriver *drv = bs->drv;
616
617 if (drv && drv->bdrv_probe_geometry) {
618 return drv->bdrv_probe_geometry(bs, geo);
619 }
620
621 return -ENOTSUP;
622}
623
Jim Meyeringeba25052012-05-28 09:27:54 +0200624/*
625 * Create a uniquely-named empty temporary file.
626 * Return 0 upon success, otherwise a negative errno value.
627 */
628int get_tmp_filename(char *filename, int size)
629{
bellardd5249392004-08-03 21:14:23 +0000630#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000631 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200632 /* GetTempFileName requires that its output buffer (4th param)
633 have length MAX_PATH or greater. */
634 assert(size >= MAX_PATH);
635 return (GetTempPath(MAX_PATH, temp_dir)
636 && GetTempFileName(temp_dir, "qem", 0, filename)
637 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000638#else
bellardea2384d2004-08-01 21:59:26 +0000639 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000640 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000641 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530642 if (!tmpdir) {
643 tmpdir = "/var/tmp";
644 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200645 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
646 return -EOVERFLOW;
647 }
bellardea2384d2004-08-01 21:59:26 +0000648 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800649 if (fd < 0) {
650 return -errno;
651 }
652 if (close(fd) != 0) {
653 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200654 return -errno;
655 }
656 return 0;
bellardd5249392004-08-03 21:14:23 +0000657#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200658}
bellardea2384d2004-08-01 21:59:26 +0000659
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200660/*
661 * Detect host devices. By convention, /dev/cdrom[N] is always
662 * recognized as a host CDROM.
663 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200664static BlockDriver *find_hdev_driver(const char *filename)
665{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200666 int score_max = 0, score;
667 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200668
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100669 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200670 if (d->bdrv_probe_device) {
671 score = d->bdrv_probe_device(filename);
672 if (score > score_max) {
673 score_max = score;
674 drv = d;
675 }
676 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200677 }
678
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200679 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200680}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200681
Kevin Wolf98289622013-07-10 15:47:39 +0200682BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500683 bool allow_protocol_prefix,
684 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200685{
686 BlockDriver *drv1;
687 char protocol[128];
688 int len;
689 const char *p;
690
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200691 /* TODO Drivers without bdrv_file_open must be specified explicitly */
692
Christoph Hellwig39508e72010-06-23 12:25:17 +0200693 /*
694 * XXX(hch): we really should not let host device detection
695 * override an explicit protocol specification, but moving this
696 * later breaks access to device names with colons in them.
697 * Thanks to the brain-dead persistent naming schemes on udev-
698 * based Linux systems those actually are quite common.
699 */
700 drv1 = find_hdev_driver(filename);
701 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200702 return drv1;
703 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200704
Kevin Wolf98289622013-07-10 15:47:39 +0200705 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100706 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200707 }
Kevin Wolf98289622013-07-10 15:47:39 +0200708
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000709 p = strchr(filename, ':');
710 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200711 len = p - filename;
712 if (len > sizeof(protocol) - 1)
713 len = sizeof(protocol) - 1;
714 memcpy(protocol, filename, len);
715 protocol[len] = '\0';
716 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
717 if (drv1->protocol_name &&
718 !strcmp(drv1->protocol_name, protocol)) {
719 return drv1;
720 }
721 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500722
723 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200724 return NULL;
725}
726
Markus Armbrusterc6684242014-11-20 16:27:10 +0100727/*
728 * Guess image format by probing its contents.
729 * This is not a good idea when your image is raw (CVE-2008-2004), but
730 * we do it anyway for backward compatibility.
731 *
732 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100733 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
734 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100735 * @filename is its filename.
736 *
737 * For all block drivers, call the bdrv_probe() method to get its
738 * probing score.
739 * Return the first block driver with the highest probing score.
740 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100741BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
742 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100743{
744 int score_max = 0, score;
745 BlockDriver *drv = NULL, *d;
746
747 QLIST_FOREACH(d, &bdrv_drivers, list) {
748 if (d->bdrv_probe) {
749 score = d->bdrv_probe(buf, buf_size, filename);
750 if (score > score_max) {
751 score_max = score;
752 drv = d;
753 }
754 }
755 }
756
757 return drv;
758}
759
Kevin Wolff500a6d2012-11-12 17:35:27 +0100760static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200761 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000762{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100763 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100764 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100765 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700766
Kevin Wolf08a00552010-06-01 18:37:31 +0200767 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100768 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100769 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200770 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700771 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700772
bellard83f64092006-08-01 16:21:11 +0000773 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000774 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200775 error_setg_errno(errp, -ret, "Could not read image for determining its "
776 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200777 *pdrv = NULL;
778 return ret;
bellard83f64092006-08-01 16:21:11 +0000779 }
780
Markus Armbrusterc6684242014-11-20 16:27:10 +0100781 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200782 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200783 error_setg(errp, "Could not determine image format: No compatible "
784 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200785 ret = -ENOENT;
786 }
787 *pdrv = drv;
788 return ret;
bellardea2384d2004-08-01 21:59:26 +0000789}
790
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100791/**
792 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200793 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100794 */
795static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
796{
797 BlockDriver *drv = bs->drv;
798
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700799 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
800 if (bs->sg)
801 return 0;
802
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100803 /* query actual device if possible, otherwise just trust the hint */
804 if (drv->bdrv_getlength) {
805 int64_t length = drv->bdrv_getlength(bs);
806 if (length < 0) {
807 return length;
808 }
Fam Zheng7e382002013-11-06 19:48:06 +0800809 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100810 }
811
812 bs->total_sectors = hint;
813 return 0;
814}
815
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100816/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100817 * Set open flags for a given discard mode
818 *
819 * Return 0 on success, -1 if the discard mode was invalid.
820 */
821int bdrv_parse_discard_flags(const char *mode, int *flags)
822{
823 *flags &= ~BDRV_O_UNMAP;
824
825 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
826 /* do nothing */
827 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
828 *flags |= BDRV_O_UNMAP;
829 } else {
830 return -1;
831 }
832
833 return 0;
834}
835
836/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100837 * Set open flags for a given cache mode
838 *
839 * Return 0 on success, -1 if the cache mode was invalid.
840 */
841int bdrv_parse_cache_flags(const char *mode, int *flags)
842{
843 *flags &= ~BDRV_O_CACHE_MASK;
844
845 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
846 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100847 } else if (!strcmp(mode, "directsync")) {
848 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100849 } else if (!strcmp(mode, "writeback")) {
850 *flags |= BDRV_O_CACHE_WB;
851 } else if (!strcmp(mode, "unsafe")) {
852 *flags |= BDRV_O_CACHE_WB;
853 *flags |= BDRV_O_NO_FLUSH;
854 } else if (!strcmp(mode, "writethrough")) {
855 /* this is the default */
856 } else {
857 return -1;
858 }
859
860 return 0;
861}
862
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000863/**
864 * The copy-on-read flag is actually a reference count so multiple users may
865 * use the feature without worrying about clobbering its previous state.
866 * Copy-on-read stays enabled until all users have called to disable it.
867 */
868void bdrv_enable_copy_on_read(BlockDriverState *bs)
869{
870 bs->copy_on_read++;
871}
872
873void bdrv_disable_copy_on_read(BlockDriverState *bs)
874{
875 assert(bs->copy_on_read > 0);
876 bs->copy_on_read--;
877}
878
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200879/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200880 * Returns the flags that a temporary snapshot should get, based on the
881 * originally requested flags (the originally requested image will have flags
882 * like a backing file)
883 */
884static int bdrv_temp_snapshot_flags(int flags)
885{
886 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
887}
888
889/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200890 * Returns the flags that bs->file should get, based on the given flags for
891 * the parent BDS
892 */
893static int bdrv_inherited_flags(int flags)
894{
895 /* Enable protocol handling, disable format probing for bs->file */
896 flags |= BDRV_O_PROTOCOL;
897
898 /* Our block drivers take care to send flushes and respect unmap policy,
899 * so we can enable both unconditionally on lower layers. */
900 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
901
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200902 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200903 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200904
905 return flags;
906}
907
Kevin Wolf317fc442014-04-25 13:27:34 +0200908/*
909 * Returns the flags that bs->backing_hd should get, based on the given flags
910 * for the parent BDS
911 */
912static int bdrv_backing_flags(int flags)
913{
914 /* backing files always opened read-only */
915 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
916
917 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200918 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200919
920 return flags;
921}
922
Kevin Wolf7b272452012-11-12 17:05:39 +0100923static int bdrv_open_flags(BlockDriverState *bs, int flags)
924{
925 int open_flags = flags | BDRV_O_CACHE_WB;
926
927 /*
928 * Clear flags that are internal to the block layer before opening the
929 * image.
930 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200931 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100932
933 /*
934 * Snapshots should be writable.
935 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200936 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100937 open_flags |= BDRV_O_RDWR;
938 }
939
940 return open_flags;
941}
942
Kevin Wolf636ea372014-01-24 14:11:52 +0100943static void bdrv_assign_node_name(BlockDriverState *bs,
944 const char *node_name,
945 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100946{
947 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100948 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100949 }
950
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200951 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200952 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200953 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100954 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100955 }
956
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100957 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200958 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100959 error_setg(errp, "node-name=%s is conflicting with a device id",
960 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100961 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100962 }
963
Benoît Canet6913c0c2014-01-23 21:31:33 +0100964 /* takes care of avoiding duplicates node names */
965 if (bdrv_find_node(node_name)) {
966 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100967 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100968 }
969
970 /* copy node name into the bs and insert it into the graph list */
971 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
972 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100973}
974
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200975/*
Kevin Wolf57915332010-04-14 15:24:50 +0200976 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100977 *
978 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200979 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100980static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200981 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200982{
983 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200984 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100985 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200986 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200987
988 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200989 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100990 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200991
Kevin Wolf45673672013-04-22 17:48:40 +0200992 if (file != NULL) {
993 filename = file->filename;
994 } else {
995 filename = qdict_get_try_str(options, "filename");
996 }
997
Kevin Wolf765003d2014-02-03 14:49:42 +0100998 if (drv->bdrv_needs_filename && !filename) {
999 error_setg(errp, "The '%s' block driver requires a file name",
1000 drv->format_name);
1001 return -EINVAL;
1002 }
1003
Kevin Wolf45673672013-04-22 17:48:40 +02001004 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +01001005
Benoît Canet6913c0c2014-01-23 21:31:33 +01001006 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +01001007 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +02001008 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +01001009 error_propagate(errp, local_err);
1010 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001011 }
1012 qdict_del(options, "node-name");
1013
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001014 /* bdrv_open() with directly using a protocol as drv. This layer is already
1015 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1016 * and return immediately. */
1017 if (file != NULL && drv->bdrv_file_open) {
1018 bdrv_swap(file, bs);
1019 return 0;
1020 }
1021
Kevin Wolf57915332010-04-14 15:24:50 +02001022 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001023 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001024 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001025 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001026 open_flags = bdrv_open_flags(bs, flags);
1027 bs->read_only = !(open_flags & BDRV_O_RDWR);
1028
1029 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001030 error_setg(errp,
1031 !bs->read_only && bdrv_is_whitelisted(drv, true)
1032 ? "Driver '%s' can only be used for read-only devices"
1033 : "Driver '%s' is not whitelisted",
1034 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001035 return -ENOTSUP;
1036 }
Kevin Wolf57915332010-04-14 15:24:50 +02001037
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001038 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001039 if (flags & BDRV_O_COPY_ON_READ) {
1040 if (!bs->read_only) {
1041 bdrv_enable_copy_on_read(bs);
1042 } else {
1043 error_setg(errp, "Can't use copy-on-read on read-only device");
1044 return -EINVAL;
1045 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001046 }
1047
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001048 if (filename != NULL) {
1049 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1050 } else {
1051 bs->filename[0] = '\0';
1052 }
Max Reitz91af7012014-07-18 20:24:56 +02001053 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001054
Kevin Wolf57915332010-04-14 15:24:50 +02001055 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001056 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001057
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001058 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001059
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001060 /* Open the image, either directly or using a protocol */
1061 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001062 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001063 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001064 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001065 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001066 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001067 error_setg(errp, "Can't use '%s' as a block driver for the "
1068 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001069 ret = -EINVAL;
1070 goto free_and_fail;
1071 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001072 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001073 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001074 }
1075
Kevin Wolf57915332010-04-14 15:24:50 +02001076 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001077 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001078 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001079 } else if (bs->filename[0]) {
1080 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001081 } else {
1082 error_setg_errno(errp, -ret, "Could not open image");
1083 }
Kevin Wolf57915332010-04-14 15:24:50 +02001084 goto free_and_fail;
1085 }
1086
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001087 if (bs->encrypted) {
1088 error_report("Encrypted images are deprecated");
1089 error_printf("Support for them will be removed in a future release.\n"
1090 "You can use 'qemu-img convert' to convert your image"
1091 " to an unencrypted one.\n");
1092 }
1093
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001094 ret = refresh_total_sectors(bs, bs->total_sectors);
1095 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001096 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001097 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001098 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001099
Kevin Wolf3baca892014-07-16 17:48:16 +02001100 bdrv_refresh_limits(bs, &local_err);
1101 if (local_err) {
1102 error_propagate(errp, local_err);
1103 ret = -EINVAL;
1104 goto free_and_fail;
1105 }
1106
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001107 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001108 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001109 return 0;
1110
1111free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001112 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001113 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001114 bs->opaque = NULL;
1115 bs->drv = NULL;
1116 return ret;
1117}
1118
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001119static QDict *parse_json_filename(const char *filename, Error **errp)
1120{
1121 QObject *options_obj;
1122 QDict *options;
1123 int ret;
1124
1125 ret = strstart(filename, "json:", &filename);
1126 assert(ret);
1127
1128 options_obj = qobject_from_json(filename);
1129 if (!options_obj) {
1130 error_setg(errp, "Could not parse the JSON options");
1131 return NULL;
1132 }
1133
1134 if (qobject_type(options_obj) != QTYPE_QDICT) {
1135 qobject_decref(options_obj);
1136 error_setg(errp, "Invalid JSON object given");
1137 return NULL;
1138 }
1139
1140 options = qobject_to_qdict(options_obj);
1141 qdict_flatten(options);
1142
1143 return options;
1144}
1145
Kevin Wolf57915332010-04-14 15:24:50 +02001146/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001147 * Fills in default options for opening images and converts the legacy
1148 * filename/flags pair to option QDict entries.
1149 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001150static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001151 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001152{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001153 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001154 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001155 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001156 bool parse_filename = false;
1157 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001158
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001159 /* Parse json: pseudo-protocol */
1160 if (filename && g_str_has_prefix(filename, "json:")) {
1161 QDict *json_options = parse_json_filename(filename, &local_err);
1162 if (local_err) {
1163 error_propagate(errp, local_err);
1164 return -EINVAL;
1165 }
1166
1167 /* Options given in the filename have lower priority than options
1168 * specified directly */
1169 qdict_join(*options, json_options, false);
1170 QDECREF(json_options);
1171 *pfilename = filename = NULL;
1172 }
1173
Kevin Wolff54120f2014-05-26 11:09:59 +02001174 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001175 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001176 if (!qdict_haskey(*options, "filename")) {
1177 qdict_put(*options, "filename", qstring_from_str(filename));
1178 parse_filename = true;
1179 } else {
1180 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1181 "the same time");
1182 return -EINVAL;
1183 }
1184 }
1185
1186 /* Find the right block driver */
1187 filename = qdict_get_try_str(*options, "filename");
1188 drvname = qdict_get_try_str(*options, "driver");
1189
Kevin Wolf17b005f2014-05-27 10:50:29 +02001190 if (drv) {
1191 if (drvname) {
1192 error_setg(errp, "Driver specified twice");
1193 return -EINVAL;
1194 }
1195 drvname = drv->format_name;
1196 qdict_put(*options, "driver", qstring_from_str(drvname));
1197 } else {
1198 if (!drvname && protocol) {
1199 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001200 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001201 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001202 return -EINVAL;
1203 }
1204
1205 drvname = drv->format_name;
1206 qdict_put(*options, "driver", qstring_from_str(drvname));
1207 } else {
1208 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001209 return -EINVAL;
1210 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001211 } else if (drvname) {
1212 drv = bdrv_find_format(drvname);
1213 if (!drv) {
1214 error_setg(errp, "Unknown driver '%s'", drvname);
1215 return -ENOENT;
1216 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001217 }
1218 }
1219
Kevin Wolf17b005f2014-05-27 10:50:29 +02001220 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001221
1222 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001223 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001224 drv->bdrv_parse_filename(filename, *options, &local_err);
1225 if (local_err) {
1226 error_propagate(errp, local_err);
1227 return -EINVAL;
1228 }
1229
1230 if (!drv->bdrv_needs_filename) {
1231 qdict_del(*options, "filename");
1232 }
1233 }
1234
1235 return 0;
1236}
1237
Fam Zheng8d24cce2014-05-23 21:29:45 +08001238void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1239{
1240
Fam Zheng826b6ca2014-05-23 21:29:47 +08001241 if (bs->backing_hd) {
1242 assert(bs->backing_blocker);
1243 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1244 } else if (backing_hd) {
1245 error_setg(&bs->backing_blocker,
Alberto Garcia81e5f782015-04-08 12:29:19 +03001246 "node is used as backing hd of '%s'",
1247 bdrv_get_device_or_node_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001248 }
1249
Fam Zheng8d24cce2014-05-23 21:29:45 +08001250 bs->backing_hd = backing_hd;
1251 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001252 error_free(bs->backing_blocker);
1253 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001254 goto out;
1255 }
1256 bs->open_flags &= ~BDRV_O_NO_BACKING;
1257 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1258 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1259 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001260
1261 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1262 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001263 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001264 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001265out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001266 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001267}
1268
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001269/*
1270 * Opens the backing file for a BlockDriverState if not yet open
1271 *
1272 * options is a QDict of options to pass to the block drivers, or NULL for an
1273 * empty set of options. The reference to the QDict is transferred to this
1274 * function (even on failure), so if the caller intends to reuse the dictionary,
1275 * it needs to use QINCREF() before calling bdrv_file_open.
1276 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001277int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001278{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001279 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001280 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001281 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001282 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001283
1284 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001285 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001286 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001287 }
1288
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001289 /* NULL means an empty set of options */
1290 if (options == NULL) {
1291 options = qdict_new();
1292 }
1293
Paolo Bonzini9156df12012-10-18 16:49:17 +02001294 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001295 if (qdict_haskey(options, "file.filename")) {
1296 backing_filename[0] = '\0';
1297 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001298 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001299 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001300 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001301 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1302 &local_err);
1303 if (local_err) {
1304 ret = -EINVAL;
1305 error_propagate(errp, local_err);
1306 QDECREF(options);
1307 goto free_exit;
1308 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001309 }
1310
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001311 if (!bs->drv || !bs->drv->supports_backing) {
1312 ret = -EINVAL;
1313 error_setg(errp, "Driver doesn't support backing files");
1314 QDECREF(options);
1315 goto free_exit;
1316 }
1317
Markus Armbrustere4e99862014-10-07 13:59:03 +02001318 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001319
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001320 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1321 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001322 }
1323
Max Reitzf67503e2014-02-18 18:33:05 +01001324 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001325 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001326 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001327 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001328 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001329 bdrv_unref(backing_hd);
1330 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001331 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001332 error_setg(errp, "Could not open backing file: %s",
1333 error_get_pretty(local_err));
1334 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001335 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001336 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001337 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001338
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001339free_exit:
1340 g_free(backing_filename);
1341 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001342}
1343
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001344/*
Max Reitzda557aa2013-12-20 19:28:11 +01001345 * Opens a disk image whose options are given as BlockdevRef in another block
1346 * device's options.
1347 *
Max Reitzda557aa2013-12-20 19:28:11 +01001348 * If allow_none is true, no image will be opened if filename is false and no
1349 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1350 *
1351 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1352 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1353 * itself, all options starting with "${bdref_key}." are considered part of the
1354 * BlockdevRef.
1355 *
1356 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001357 *
1358 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001359 */
1360int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1361 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001362 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001363{
1364 QDict *image_options;
1365 int ret;
1366 char *bdref_key_dot;
1367 const char *reference;
1368
Max Reitzf67503e2014-02-18 18:33:05 +01001369 assert(pbs);
1370 assert(*pbs == NULL);
1371
Max Reitzda557aa2013-12-20 19:28:11 +01001372 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1373 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1374 g_free(bdref_key_dot);
1375
1376 reference = qdict_get_try_str(options, bdref_key);
1377 if (!filename && !reference && !qdict_size(image_options)) {
1378 if (allow_none) {
1379 ret = 0;
1380 } else {
1381 error_setg(errp, "A block device must be specified for \"%s\"",
1382 bdref_key);
1383 ret = -EINVAL;
1384 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001385 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001386 goto done;
1387 }
1388
Max Reitzf7d9fd82014-02-18 18:33:12 +01001389 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001390
1391done:
1392 qdict_del(options, bdref_key);
1393 return ret;
1394}
1395
Chen Gang6b8aeca2014-06-23 23:28:23 +08001396int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001397{
1398 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001399 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001400 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001401 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001402 QDict *snapshot_options;
1403 BlockDriverState *bs_snapshot;
1404 Error *local_err;
1405 int ret;
1406
1407 /* if snapshot, we create a temporary backing file and open it
1408 instead of opening 'filename' directly */
1409
1410 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001411 total_size = bdrv_getlength(bs);
1412 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001413 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001414 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001415 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001416 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001417
1418 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001419 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001420 if (ret < 0) {
1421 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001422 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001423 }
1424
Max Reitzef810432014-12-02 18:32:42 +01001425 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001426 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001427 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001428 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001429 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001430 if (ret < 0) {
1431 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1432 "'%s': %s", tmp_filename,
1433 error_get_pretty(local_err));
1434 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001435 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001436 }
1437
1438 /* Prepare a new options QDict for the temporary file */
1439 snapshot_options = qdict_new();
1440 qdict_put(snapshot_options, "file.driver",
1441 qstring_from_str("file"));
1442 qdict_put(snapshot_options, "file.filename",
1443 qstring_from_str(tmp_filename));
1444
Markus Armbrustere4e99862014-10-07 13:59:03 +02001445 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001446
1447 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001448 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001449 if (ret < 0) {
1450 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001451 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001452 }
1453
1454 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001455
1456out:
1457 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001458 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001459}
1460
Max Reitzda557aa2013-12-20 19:28:11 +01001461/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001462 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001463 *
1464 * options is a QDict of options to pass to the block drivers, or NULL for an
1465 * empty set of options. The reference to the QDict belongs to the block layer
1466 * after the call (even on failure), so if the caller intends to reuse the
1467 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001468 *
1469 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1470 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001471 *
1472 * The reference parameter may be used to specify an existing block device which
1473 * should be opened. If specified, neither options nor a filename may be given,
1474 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001475 */
Max Reitzddf56362014-02-18 18:33:06 +01001476int bdrv_open(BlockDriverState **pbs, const char *filename,
1477 const char *reference, QDict *options, int flags,
1478 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001479{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001480 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001481 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001482 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001483 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001484 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001485
Max Reitzf67503e2014-02-18 18:33:05 +01001486 assert(pbs);
1487
Max Reitzddf56362014-02-18 18:33:06 +01001488 if (reference) {
1489 bool options_non_empty = options ? qdict_size(options) : false;
1490 QDECREF(options);
1491
1492 if (*pbs) {
1493 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1494 "another block device");
1495 return -EINVAL;
1496 }
1497
1498 if (filename || options_non_empty) {
1499 error_setg(errp, "Cannot reference an existing block device with "
1500 "additional options or a new filename");
1501 return -EINVAL;
1502 }
1503
1504 bs = bdrv_lookup_bs(reference, reference, errp);
1505 if (!bs) {
1506 return -ENODEV;
1507 }
1508 bdrv_ref(bs);
1509 *pbs = bs;
1510 return 0;
1511 }
1512
Max Reitzf67503e2014-02-18 18:33:05 +01001513 if (*pbs) {
1514 bs = *pbs;
1515 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001516 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001517 }
1518
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001519 /* NULL means an empty set of options */
1520 if (options == NULL) {
1521 options = qdict_new();
1522 }
1523
Kevin Wolf17b005f2014-05-27 10:50:29 +02001524 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001525 if (local_err) {
1526 goto fail;
1527 }
1528
Kevin Wolf76c591b2014-06-04 14:19:44 +02001529 /* Find the right image format driver */
1530 drv = NULL;
1531 drvname = qdict_get_try_str(options, "driver");
1532 if (drvname) {
1533 drv = bdrv_find_format(drvname);
1534 qdict_del(options, "driver");
1535 if (!drv) {
1536 error_setg(errp, "Unknown driver: '%s'", drvname);
1537 ret = -EINVAL;
1538 goto fail;
1539 }
1540 }
1541
1542 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1543 if (drv && !drv->bdrv_file_open) {
1544 /* If the user explicitly wants a format driver here, we'll need to add
1545 * another layer for the protocol in bs->file */
1546 flags &= ~BDRV_O_PROTOCOL;
1547 }
1548
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001549 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001550 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001551
Kevin Wolff4788ad2014-06-03 16:44:19 +02001552 /* Open image file without format layer */
1553 if ((flags & BDRV_O_PROTOCOL) == 0) {
1554 if (flags & BDRV_O_RDWR) {
1555 flags |= BDRV_O_ALLOW_RDWR;
1556 }
1557 if (flags & BDRV_O_SNAPSHOT) {
1558 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1559 flags = bdrv_backing_flags(flags);
1560 }
1561
1562 assert(file == NULL);
1563 ret = bdrv_open_image(&file, filename, options, "file",
1564 bdrv_inherited_flags(flags),
1565 true, &local_err);
1566 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001567 goto fail;
1568 }
1569 }
1570
Kevin Wolf76c591b2014-06-04 14:19:44 +02001571 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001572 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001573 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001574 ret = find_image_format(file, filename, &drv, &local_err);
1575 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001576 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001577 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001578 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001579 error_setg(errp, "Must specify either driver or file");
1580 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001581 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001582 }
1583
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001584 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001585 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001586 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001587 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001588 }
1589
Max Reitz2a05cbe2013-12-20 19:28:10 +01001590 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001591 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001592 file = NULL;
1593 }
1594
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001595 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001596 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001597 QDict *backing_options;
1598
Benoît Canet5726d872013-09-25 13:30:01 +02001599 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001600 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001601 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001602 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001603 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001604 }
1605
Max Reitz91af7012014-07-18 20:24:56 +02001606 bdrv_refresh_filename(bs);
1607
Kevin Wolfb9988752014-04-03 12:09:34 +02001608 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1609 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001610 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001611 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001612 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001613 goto close_and_fail;
1614 }
1615 }
1616
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001617 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001618 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001619 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001620 if (flags & BDRV_O_PROTOCOL) {
1621 error_setg(errp, "Block protocol '%s' doesn't support the option "
1622 "'%s'", drv->format_name, entry->key);
1623 } else {
1624 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1625 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001626 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001627 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001628
1629 ret = -EINVAL;
1630 goto close_and_fail;
1631 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001632
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001633 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001634 if (bs->blk) {
1635 blk_dev_change_media_cb(bs->blk, true);
1636 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001637 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1638 && !runstate_check(RUN_STATE_INMIGRATE)
1639 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1640 error_setg(errp,
1641 "Guest must be stopped for opening of encrypted image");
1642 ret = -EBUSY;
1643 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001644 }
1645
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001646 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001647 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001648 return 0;
1649
Kevin Wolf8bfea152014-04-11 19:16:36 +02001650fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001651 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001652 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001653 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001654 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001655 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001656 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001657 if (!*pbs) {
1658 /* If *pbs is NULL, a new BDS has been created in this function and
1659 needs to be freed now. Otherwise, it does not need to be closed,
1660 since it has not really been opened yet. */
1661 bdrv_unref(bs);
1662 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001663 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001664 error_propagate(errp, local_err);
1665 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001666 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001667
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001668close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001669 /* See fail path, but now the BDS has to be always closed */
1670 if (*pbs) {
1671 bdrv_close(bs);
1672 } else {
1673 bdrv_unref(bs);
1674 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001675 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001676 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001677 error_propagate(errp, local_err);
1678 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001679 return ret;
1680}
1681
Jeff Codye971aa12012-09-20 15:13:19 -04001682typedef struct BlockReopenQueueEntry {
1683 bool prepared;
1684 BDRVReopenState state;
1685 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1686} BlockReopenQueueEntry;
1687
1688/*
1689 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1690 * reopen of multiple devices.
1691 *
1692 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1693 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1694 * be created and initialized. This newly created BlockReopenQueue should be
1695 * passed back in for subsequent calls that are intended to be of the same
1696 * atomic 'set'.
1697 *
1698 * bs is the BlockDriverState to add to the reopen queue.
1699 *
1700 * flags contains the open flags for the associated bs
1701 *
1702 * returns a pointer to bs_queue, which is either the newly allocated
1703 * bs_queue, or the existing bs_queue being used.
1704 *
1705 */
1706BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1707 BlockDriverState *bs, int flags)
1708{
1709 assert(bs != NULL);
1710
1711 BlockReopenQueueEntry *bs_entry;
1712 if (bs_queue == NULL) {
1713 bs_queue = g_new0(BlockReopenQueue, 1);
1714 QSIMPLEQ_INIT(bs_queue);
1715 }
1716
Kevin Wolff1f25a22014-04-25 19:04:55 +02001717 /* bdrv_open() masks this flag out */
1718 flags &= ~BDRV_O_PROTOCOL;
1719
Jeff Codye971aa12012-09-20 15:13:19 -04001720 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001721 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001722 }
1723
1724 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1725 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1726
1727 bs_entry->state.bs = bs;
1728 bs_entry->state.flags = flags;
1729
1730 return bs_queue;
1731}
1732
1733/*
1734 * Reopen multiple BlockDriverStates atomically & transactionally.
1735 *
1736 * The queue passed in (bs_queue) must have been built up previous
1737 * via bdrv_reopen_queue().
1738 *
1739 * Reopens all BDS specified in the queue, with the appropriate
1740 * flags. All devices are prepared for reopen, and failure of any
1741 * device will cause all device changes to be abandonded, and intermediate
1742 * data cleaned up.
1743 *
1744 * If all devices prepare successfully, then the changes are committed
1745 * to all devices.
1746 *
1747 */
1748int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1749{
1750 int ret = -1;
1751 BlockReopenQueueEntry *bs_entry, *next;
1752 Error *local_err = NULL;
1753
1754 assert(bs_queue != NULL);
1755
1756 bdrv_drain_all();
1757
1758 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1759 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1760 error_propagate(errp, local_err);
1761 goto cleanup;
1762 }
1763 bs_entry->prepared = true;
1764 }
1765
1766 /* If we reach this point, we have success and just need to apply the
1767 * changes
1768 */
1769 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1770 bdrv_reopen_commit(&bs_entry->state);
1771 }
1772
1773 ret = 0;
1774
1775cleanup:
1776 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1777 if (ret && bs_entry->prepared) {
1778 bdrv_reopen_abort(&bs_entry->state);
1779 }
1780 g_free(bs_entry);
1781 }
1782 g_free(bs_queue);
1783 return ret;
1784}
1785
1786
1787/* Reopen a single BlockDriverState with the specified flags. */
1788int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1789{
1790 int ret = -1;
1791 Error *local_err = NULL;
1792 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1793
1794 ret = bdrv_reopen_multiple(queue, &local_err);
1795 if (local_err != NULL) {
1796 error_propagate(errp, local_err);
1797 }
1798 return ret;
1799}
1800
1801
1802/*
1803 * Prepares a BlockDriverState for reopen. All changes are staged in the
1804 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1805 * the block driver layer .bdrv_reopen_prepare()
1806 *
1807 * bs is the BlockDriverState to reopen
1808 * flags are the new open flags
1809 * queue is the reopen queue
1810 *
1811 * Returns 0 on success, non-zero on error. On error errp will be set
1812 * as well.
1813 *
1814 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1815 * It is the responsibility of the caller to then call the abort() or
1816 * commit() for any other BDS that have been left in a prepare() state
1817 *
1818 */
1819int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1820 Error **errp)
1821{
1822 int ret = -1;
1823 Error *local_err = NULL;
1824 BlockDriver *drv;
1825
1826 assert(reopen_state != NULL);
1827 assert(reopen_state->bs->drv != NULL);
1828 drv = reopen_state->bs->drv;
1829
1830 /* if we are to stay read-only, do not allow permission change
1831 * to r/w */
1832 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1833 reopen_state->flags & BDRV_O_RDWR) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03001834 error_setg(errp, "Node '%s' is read only",
1835 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001836 goto error;
1837 }
1838
1839
1840 ret = bdrv_flush(reopen_state->bs);
1841 if (ret) {
1842 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1843 strerror(-ret));
1844 goto error;
1845 }
1846
1847 if (drv->bdrv_reopen_prepare) {
1848 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1849 if (ret) {
1850 if (local_err != NULL) {
1851 error_propagate(errp, local_err);
1852 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001853 error_setg(errp, "failed while preparing to reopen image '%s'",
1854 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001855 }
1856 goto error;
1857 }
1858 } else {
1859 /* It is currently mandatory to have a bdrv_reopen_prepare()
1860 * handler for each supported drv. */
Alberto Garcia81e5f782015-04-08 12:29:19 +03001861 error_setg(errp, "Block format '%s' used by node '%s' "
1862 "does not support reopening files", drv->format_name,
1863 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001864 ret = -1;
1865 goto error;
1866 }
1867
1868 ret = 0;
1869
1870error:
1871 return ret;
1872}
1873
1874/*
1875 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1876 * makes them final by swapping the staging BlockDriverState contents into
1877 * the active BlockDriverState contents.
1878 */
1879void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1880{
1881 BlockDriver *drv;
1882
1883 assert(reopen_state != NULL);
1884 drv = reopen_state->bs->drv;
1885 assert(drv != NULL);
1886
1887 /* If there are any driver level actions to take */
1888 if (drv->bdrv_reopen_commit) {
1889 drv->bdrv_reopen_commit(reopen_state);
1890 }
1891
1892 /* set BDS specific flags now */
1893 reopen_state->bs->open_flags = reopen_state->flags;
1894 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1895 BDRV_O_CACHE_WB);
1896 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001897
Kevin Wolf3baca892014-07-16 17:48:16 +02001898 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001899}
1900
1901/*
1902 * Abort the reopen, and delete and free the staged changes in
1903 * reopen_state
1904 */
1905void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1906{
1907 BlockDriver *drv;
1908
1909 assert(reopen_state != NULL);
1910 drv = reopen_state->bs->drv;
1911 assert(drv != NULL);
1912
1913 if (drv->bdrv_reopen_abort) {
1914 drv->bdrv_reopen_abort(reopen_state);
1915 }
1916}
1917
1918
bellardfc01f7e2003-06-30 10:03:06 +00001919void bdrv_close(BlockDriverState *bs)
1920{
Max Reitz33384422014-06-20 21:57:33 +02001921 BdrvAioNotifier *ban, *ban_next;
1922
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001923 if (bs->job) {
1924 block_job_cancel_sync(bs->job);
1925 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001926 bdrv_drain_all(); /* complete I/O */
1927 bdrv_flush(bs);
1928 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001929 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001930
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001931 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001932 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001933 BlockDriverState *backing_hd = bs->backing_hd;
1934 bdrv_set_backing_hd(bs, NULL);
1935 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001936 }
bellardea2384d2004-08-01 21:59:26 +00001937 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001938 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001939 bs->opaque = NULL;
1940 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001941 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001942 bs->backing_file[0] = '\0';
1943 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001944 bs->total_sectors = 0;
1945 bs->encrypted = 0;
1946 bs->valid_key = 0;
1947 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001948 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001949 QDECREF(bs->options);
1950 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001951 QDECREF(bs->full_open_options);
1952 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001953
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001954 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001955 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001956 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001957 }
bellardb3380822004-03-14 21:38:54 +00001958 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001959
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001960 if (bs->blk) {
1961 blk_dev_change_media_cb(bs->blk, false);
1962 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001963
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001964 /*throttling disk I/O limits*/
1965 if (bs->io_limits_enabled) {
1966 bdrv_io_limits_disable(bs);
1967 }
Max Reitz33384422014-06-20 21:57:33 +02001968
1969 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1970 g_free(ban);
1971 }
1972 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001973}
1974
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001975void bdrv_close_all(void)
1976{
1977 BlockDriverState *bs;
1978
Benoît Canetdc364f42014-01-23 21:31:32 +01001979 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001980 AioContext *aio_context = bdrv_get_aio_context(bs);
1981
1982 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001983 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001984 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001985 }
1986}
1987
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001988/* Check if any requests are in-flight (including throttled requests) */
1989static bool bdrv_requests_pending(BlockDriverState *bs)
1990{
1991 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1992 return true;
1993 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001994 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1995 return true;
1996 }
1997 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001998 return true;
1999 }
2000 if (bs->file && bdrv_requests_pending(bs->file)) {
2001 return true;
2002 }
2003 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
2004 return true;
2005 }
2006 return false;
2007}
2008
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002009static bool bdrv_drain_one(BlockDriverState *bs)
2010{
2011 bool bs_busy;
2012
2013 bdrv_flush_io_queue(bs);
2014 bdrv_start_throttled_reqs(bs);
2015 bs_busy = bdrv_requests_pending(bs);
2016 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2017 return bs_busy;
2018}
2019
2020/*
2021 * Wait for pending requests to complete on a single BlockDriverState subtree
2022 *
2023 * See the warning in bdrv_drain_all(). This function can only be called if
2024 * you are sure nothing can generate I/O because you have op blockers
2025 * installed.
2026 *
2027 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2028 * AioContext.
2029 */
2030void bdrv_drain(BlockDriverState *bs)
2031{
2032 while (bdrv_drain_one(bs)) {
2033 /* Keep iterating */
2034 }
2035}
2036
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002037/*
2038 * Wait for pending requests to complete across all BlockDriverStates
2039 *
2040 * This function does not flush data to disk, use bdrv_flush_all() for that
2041 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002042 *
2043 * Note that completion of an asynchronous I/O operation can trigger any
2044 * number of other I/O operations on other devices---for example a coroutine
2045 * can be arbitrarily complex and a constant flow of I/O can come until the
2046 * coroutine is complete. Because of this, it is not possible to have a
2047 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002048 */
2049void bdrv_drain_all(void)
2050{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002051 /* Always run first iteration so any pending completion BHs run */
2052 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002053 BlockDriverState *bs;
2054
Fam Zheng69da3b02015-04-03 22:05:19 +08002055 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2056 AioContext *aio_context = bdrv_get_aio_context(bs);
2057
2058 aio_context_acquire(aio_context);
2059 if (bs->job) {
2060 block_job_pause(bs->job);
2061 }
2062 aio_context_release(aio_context);
2063 }
2064
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002065 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002066 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002067
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002068 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2069 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002070
2071 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002072 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002073 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002074 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002075 }
Fam Zheng69da3b02015-04-03 22:05:19 +08002076
2077 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2078 AioContext *aio_context = bdrv_get_aio_context(bs);
2079
2080 aio_context_acquire(aio_context);
2081 if (bs->job) {
2082 block_job_resume(bs->job);
2083 }
2084 aio_context_release(aio_context);
2085 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002086}
2087
Benoît Canetdc364f42014-01-23 21:31:32 +01002088/* make a BlockDriverState anonymous by removing from bdrv_state and
2089 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002090 Also, NULL terminate the device_name to prevent double remove */
2091void bdrv_make_anon(BlockDriverState *bs)
2092{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002093 /*
2094 * Take care to remove bs from bdrv_states only when it's actually
2095 * in it. Note that bs->device_list.tqe_prev is initially null,
2096 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2097 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2098 * resetting it to null on remove.
2099 */
2100 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002101 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002102 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002103 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002104 if (bs->node_name[0] != '\0') {
2105 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2106 }
2107 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002108}
2109
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002110static void bdrv_rebind(BlockDriverState *bs)
2111{
2112 if (bs->drv && bs->drv->bdrv_rebind) {
2113 bs->drv->bdrv_rebind(bs);
2114 }
2115}
2116
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002117static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2118 BlockDriverState *bs_src)
2119{
2120 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002121
2122 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002123 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002124 bs_dest->copy_on_read = bs_src->copy_on_read;
2125
2126 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2127
Benoît Canetcc0681c2013-09-02 14:14:39 +02002128 /* i/o throttled req */
2129 memcpy(&bs_dest->throttle_state,
2130 &bs_src->throttle_state,
2131 sizeof(ThrottleState));
2132 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2133 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002134 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2135
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002136 /* r/w error */
2137 bs_dest->on_read_error = bs_src->on_read_error;
2138 bs_dest->on_write_error = bs_src->on_write_error;
2139
2140 /* i/o status */
2141 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2142 bs_dest->iostatus = bs_src->iostatus;
2143
2144 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002145 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002146
Fam Zheng9fcb0252013-08-23 09:14:46 +08002147 /* reference count */
2148 bs_dest->refcnt = bs_src->refcnt;
2149
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002150 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002151 bs_dest->job = bs_src->job;
2152
2153 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002154 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002155 bs_dest->blk = bs_src->blk;
2156
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002157 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2158 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002159}
2160
2161/*
2162 * Swap bs contents for two image chains while they are live,
2163 * while keeping required fields on the BlockDriverState that is
2164 * actually attached to a device.
2165 *
2166 * This will modify the BlockDriverState fields, and swap contents
2167 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2168 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002169 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002170 *
2171 * This function does not create any image files.
2172 */
2173void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2174{
2175 BlockDriverState tmp;
2176
Benoît Canet90ce8a02014-03-05 23:48:29 +01002177 /* The code needs to swap the node_name but simply swapping node_list won't
2178 * work so first remove the nodes from the graph list, do the swap then
2179 * insert them back if needed.
2180 */
2181 if (bs_new->node_name[0] != '\0') {
2182 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2183 }
2184 if (bs_old->node_name[0] != '\0') {
2185 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2186 }
2187
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002188 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002189 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002190 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002191 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002192 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002193 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002194
2195 tmp = *bs_new;
2196 *bs_new = *bs_old;
2197 *bs_old = tmp;
2198
2199 /* there are some fields that should not be swapped, move them back */
2200 bdrv_move_feature_fields(&tmp, bs_old);
2201 bdrv_move_feature_fields(bs_old, bs_new);
2202 bdrv_move_feature_fields(bs_new, &tmp);
2203
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002204 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002205 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002206
2207 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002208 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002209 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002210 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002211
Benoît Canet90ce8a02014-03-05 23:48:29 +01002212 /* insert the nodes back into the graph node list if needed */
2213 if (bs_new->node_name[0] != '\0') {
2214 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2215 }
2216 if (bs_old->node_name[0] != '\0') {
2217 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2218 }
2219
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002220 bdrv_rebind(bs_new);
2221 bdrv_rebind(bs_old);
2222}
2223
Jeff Cody8802d1f2012-02-28 15:54:06 -05002224/*
2225 * Add new bs contents at the top of an image chain while the chain is
2226 * live, while keeping required fields on the top layer.
2227 *
2228 * This will modify the BlockDriverState fields, and swap contents
2229 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2230 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002231 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002232 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002233 * This function does not create any image files.
2234 */
2235void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2236{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002237 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002238
2239 /* The contents of 'tmp' will become bs_top, as we are
2240 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002241 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002242}
2243
Fam Zheng4f6fd342013-08-23 09:14:47 +08002244static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002245{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002246 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002247 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002248 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002249 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002250
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002251 bdrv_close(bs);
2252
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002253 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002254 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002255
Anthony Liguori7267c092011-08-20 22:09:37 -05002256 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002257}
2258
aliguorie97fc192009-04-21 23:11:50 +00002259/*
2260 * Run consistency checks on an image
2261 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002262 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002263 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002264 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002265 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002266int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002267{
Max Reitz908bcd52014-08-07 22:47:55 +02002268 if (bs->drv == NULL) {
2269 return -ENOMEDIUM;
2270 }
aliguorie97fc192009-04-21 23:11:50 +00002271 if (bs->drv->bdrv_check == NULL) {
2272 return -ENOTSUP;
2273 }
2274
Kevin Wolfe076f332010-06-29 11:43:13 +02002275 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002276 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002277}
2278
Kevin Wolf8a426612010-07-16 17:17:01 +02002279#define COMMIT_BUF_SECTORS 2048
2280
bellard33e39632003-07-06 17:15:21 +00002281/* commit COW file into the raw image */
2282int bdrv_commit(BlockDriverState *bs)
2283{
bellard19cb3732006-08-19 11:45:59 +00002284 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002285 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002286 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002287 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002288 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002289
bellard19cb3732006-08-19 11:45:59 +00002290 if (!drv)
2291 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002292
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002293 if (!bs->backing_hd) {
2294 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002295 }
2296
Fam Zhengbb000212014-09-11 13:14:00 +08002297 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2298 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002299 return -EBUSY;
2300 }
2301
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002302 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002303 open_flags = bs->backing_hd->open_flags;
2304
2305 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002306 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2307 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002308 }
bellard33e39632003-07-06 17:15:21 +00002309 }
bellardea2384d2004-08-01 21:59:26 +00002310
Jeff Cody72706ea2014-01-24 09:02:35 -05002311 length = bdrv_getlength(bs);
2312 if (length < 0) {
2313 ret = length;
2314 goto ro_cleanup;
2315 }
2316
2317 backing_length = bdrv_getlength(bs->backing_hd);
2318 if (backing_length < 0) {
2319 ret = backing_length;
2320 goto ro_cleanup;
2321 }
2322
2323 /* If our top snapshot is larger than the backing file image,
2324 * grow the backing file image if possible. If not possible,
2325 * we must return an error */
2326 if (length > backing_length) {
2327 ret = bdrv_truncate(bs->backing_hd, length);
2328 if (ret < 0) {
2329 goto ro_cleanup;
2330 }
2331 }
2332
2333 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002334
2335 /* qemu_try_blockalign() for bs will choose an alignment that works for
2336 * bs->backing_hd as well, so no need to compare the alignment manually. */
2337 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2338 if (buf == NULL) {
2339 ret = -ENOMEM;
2340 goto ro_cleanup;
2341 }
bellardea2384d2004-08-01 21:59:26 +00002342
Kevin Wolf8a426612010-07-16 17:17:01 +02002343 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002344 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2345 if (ret < 0) {
2346 goto ro_cleanup;
2347 }
2348 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002349 ret = bdrv_read(bs, sector, buf, n);
2350 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002351 goto ro_cleanup;
2352 }
2353
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002354 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2355 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002356 goto ro_cleanup;
2357 }
bellardea2384d2004-08-01 21:59:26 +00002358 }
2359 }
bellard95389c82005-12-18 18:28:15 +00002360
Christoph Hellwig1d449522010-01-17 12:32:30 +01002361 if (drv->bdrv_make_empty) {
2362 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002363 if (ret < 0) {
2364 goto ro_cleanup;
2365 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002366 bdrv_flush(bs);
2367 }
bellard95389c82005-12-18 18:28:15 +00002368
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002369 /*
2370 * Make sure all data we wrote to the backing device is actually
2371 * stable on disk.
2372 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002373 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002374 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002375 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002376
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002377 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002378ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002379 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002380
2381 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002382 /* ignoring error return here */
2383 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002384 }
2385
Christoph Hellwig1d449522010-01-17 12:32:30 +01002386 return ret;
bellard33e39632003-07-06 17:15:21 +00002387}
2388
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002389int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002390{
2391 BlockDriverState *bs;
2392
Benoît Canetdc364f42014-01-23 21:31:32 +01002393 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002394 AioContext *aio_context = bdrv_get_aio_context(bs);
2395
2396 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002397 if (bs->drv && bs->backing_hd) {
2398 int ret = bdrv_commit(bs);
2399 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002400 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002401 return ret;
2402 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002403 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002404 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002405 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002406 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002407}
2408
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002409/**
2410 * Remove an active request from the tracked requests list
2411 *
2412 * This function should be called when a tracked request is completing.
2413 */
2414static void tracked_request_end(BdrvTrackedRequest *req)
2415{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002416 if (req->serialising) {
2417 req->bs->serialising_in_flight--;
2418 }
2419
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002420 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002421 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002422}
2423
2424/**
2425 * Add an active request to the tracked requests list
2426 */
2427static void tracked_request_begin(BdrvTrackedRequest *req,
2428 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002429 int64_t offset,
2430 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002431{
2432 *req = (BdrvTrackedRequest){
2433 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002434 .offset = offset,
2435 .bytes = bytes,
2436 .is_write = is_write,
2437 .co = qemu_coroutine_self(),
2438 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002439 .overlap_offset = offset,
2440 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002441 };
2442
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002443 qemu_co_queue_init(&req->wait_queue);
2444
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002445 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2446}
2447
Kevin Wolfe96126f2014-02-08 10:42:18 +01002448static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002449{
Kevin Wolf73271452013-12-04 17:08:50 +01002450 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002451 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2452 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002453
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002454 if (!req->serialising) {
2455 req->bs->serialising_in_flight++;
2456 req->serialising = true;
2457 }
Kevin Wolf73271452013-12-04 17:08:50 +01002458
2459 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2460 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002461}
2462
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002463/**
2464 * Round a region to cluster boundaries
2465 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002466void bdrv_round_to_clusters(BlockDriverState *bs,
2467 int64_t sector_num, int nb_sectors,
2468 int64_t *cluster_sector_num,
2469 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002470{
2471 BlockDriverInfo bdi;
2472
2473 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2474 *cluster_sector_num = sector_num;
2475 *cluster_nb_sectors = nb_sectors;
2476 } else {
2477 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2478 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2479 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2480 nb_sectors, c);
2481 }
2482}
2483
Kevin Wolf73271452013-12-04 17:08:50 +01002484static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002485{
2486 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002487 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002488
Kevin Wolf73271452013-12-04 17:08:50 +01002489 ret = bdrv_get_info(bs, &bdi);
2490 if (ret < 0 || bdi.cluster_size == 0) {
2491 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002492 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002493 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002494 }
2495}
2496
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002497static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002498 int64_t offset, unsigned int bytes)
2499{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002500 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002501 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002502 return false;
2503 }
2504 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002505 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002506 return false;
2507 }
2508 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002509}
2510
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002511static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002512{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002513 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002514 BdrvTrackedRequest *req;
2515 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002516 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002517
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002518 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002519 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002520 }
2521
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002522 do {
2523 retry = false;
2524 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002525 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002526 continue;
2527 }
Kevin Wolf73271452013-12-04 17:08:50 +01002528 if (tracked_request_overlaps(req, self->overlap_offset,
2529 self->overlap_bytes))
2530 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002531 /* Hitting this means there was a reentrant request, for
2532 * example, a block driver issuing nested requests. This must
2533 * never happen since it means deadlock.
2534 */
2535 assert(qemu_coroutine_self() != req->co);
2536
Kevin Wolf64604402013-12-13 13:04:35 +01002537 /* If the request is already (indirectly) waiting for us, or
2538 * will wait for us as soon as it wakes up, then just go on
2539 * (instead of producing a deadlock in the former case). */
2540 if (!req->waiting_for) {
2541 self->waiting_for = req;
2542 qemu_co_queue_wait(&req->wait_queue);
2543 self->waiting_for = NULL;
2544 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002545 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002546 break;
2547 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002548 }
2549 }
2550 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002551
2552 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002553}
2554
Kevin Wolf756e6732010-01-12 12:55:17 +01002555/*
2556 * Return values:
2557 * 0 - success
2558 * -EINVAL - backing format specified, but no file
2559 * -ENOSPC - can't update the backing file because no space is left in the
2560 * image file header
2561 * -ENOTSUP - format driver doesn't support changing the backing file
2562 */
2563int bdrv_change_backing_file(BlockDriverState *bs,
2564 const char *backing_file, const char *backing_fmt)
2565{
2566 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002567 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002568
Paolo Bonzini5f377792012-04-12 14:01:01 +02002569 /* Backing file format doesn't make sense without a backing file */
2570 if (backing_fmt && !backing_file) {
2571 return -EINVAL;
2572 }
2573
Kevin Wolf756e6732010-01-12 12:55:17 +01002574 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002575 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002576 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002577 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002578 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002579
2580 if (ret == 0) {
2581 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2582 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2583 }
2584 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002585}
2586
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002587/*
2588 * Finds the image layer in the chain that has 'bs' as its backing file.
2589 *
2590 * active is the current topmost image.
2591 *
2592 * Returns NULL if bs is not found in active's image chain,
2593 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002594 *
2595 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002596 */
2597BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2598 BlockDriverState *bs)
2599{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002600 while (active && bs != active->backing_hd) {
2601 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002602 }
2603
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002604 return active;
2605}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002606
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002607/* Given a BDS, searches for the base layer. */
2608BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2609{
2610 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002611}
2612
2613typedef struct BlkIntermediateStates {
2614 BlockDriverState *bs;
2615 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2616} BlkIntermediateStates;
2617
2618
2619/*
2620 * Drops images above 'base' up to and including 'top', and sets the image
2621 * above 'top' to have base as its backing file.
2622 *
2623 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2624 * information in 'bs' can be properly updated.
2625 *
2626 * E.g., this will convert the following chain:
2627 * bottom <- base <- intermediate <- top <- active
2628 *
2629 * to
2630 *
2631 * bottom <- base <- active
2632 *
2633 * It is allowed for bottom==base, in which case it converts:
2634 *
2635 * base <- intermediate <- top <- active
2636 *
2637 * to
2638 *
2639 * base <- active
2640 *
Jeff Cody54e26902014-06-25 15:40:10 -04002641 * If backing_file_str is non-NULL, it will be used when modifying top's
2642 * overlay image metadata.
2643 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002644 * Error conditions:
2645 * if active == top, that is considered an error
2646 *
2647 */
2648int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002649 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002650{
2651 BlockDriverState *intermediate;
2652 BlockDriverState *base_bs = NULL;
2653 BlockDriverState *new_top_bs = NULL;
2654 BlkIntermediateStates *intermediate_state, *next;
2655 int ret = -EIO;
2656
2657 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2658 QSIMPLEQ_INIT(&states_to_delete);
2659
2660 if (!top->drv || !base->drv) {
2661 goto exit;
2662 }
2663
2664 new_top_bs = bdrv_find_overlay(active, top);
2665
2666 if (new_top_bs == NULL) {
2667 /* we could not find the image above 'top', this is an error */
2668 goto exit;
2669 }
2670
2671 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2672 * to do, no intermediate images */
2673 if (new_top_bs->backing_hd == base) {
2674 ret = 0;
2675 goto exit;
2676 }
2677
2678 intermediate = top;
2679
2680 /* now we will go down through the list, and add each BDS we find
2681 * into our deletion queue, until we hit the 'base'
2682 */
2683 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002684 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002685 intermediate_state->bs = intermediate;
2686 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2687
2688 if (intermediate->backing_hd == base) {
2689 base_bs = intermediate->backing_hd;
2690 break;
2691 }
2692 intermediate = intermediate->backing_hd;
2693 }
2694 if (base_bs == NULL) {
2695 /* something went wrong, we did not end at the base. safely
2696 * unravel everything, and exit with error */
2697 goto exit;
2698 }
2699
2700 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002701 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2702 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002703 base_bs->drv ? base_bs->drv->format_name : "");
2704 if (ret) {
2705 goto exit;
2706 }
Fam Zheng920beae2014-05-23 21:29:46 +08002707 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002708
2709 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2710 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002711 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002712 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002713 }
2714 ret = 0;
2715
2716exit:
2717 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2718 g_free(intermediate_state);
2719 }
2720 return ret;
2721}
2722
2723
aliguori71d07702009-03-03 17:37:16 +00002724static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2725 size_t size)
2726{
Peter Lieven75af1f32015-02-06 11:54:11 +01002727 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002728 return -EIO;
2729 }
2730
Max Reitzc0191e72015-02-05 13:58:24 -05002731 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002732 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002733 }
aliguori71d07702009-03-03 17:37:16 +00002734
Max Reitzc0191e72015-02-05 13:58:24 -05002735 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002736 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002737 }
aliguori71d07702009-03-03 17:37:16 +00002738
2739 return 0;
2740}
2741
2742static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2743 int nb_sectors)
2744{
Peter Lieven75af1f32015-02-06 11:54:11 +01002745 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002746 return -EIO;
2747 }
2748
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002749 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2750 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002751}
2752
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002753typedef struct RwCo {
2754 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002755 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002756 QEMUIOVector *qiov;
2757 bool is_write;
2758 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002759 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002760} RwCo;
2761
2762static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2763{
2764 RwCo *rwco = opaque;
2765
2766 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002767 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2768 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002769 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002770 } else {
2771 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2772 rwco->qiov->size, rwco->qiov,
2773 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002774 }
2775}
2776
2777/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002778 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002779 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002780static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2781 QEMUIOVector *qiov, bool is_write,
2782 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002783{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002784 Coroutine *co;
2785 RwCo rwco = {
2786 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002787 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002788 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002789 .is_write = is_write,
2790 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002791 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002792 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002793
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002794 /**
2795 * In sync call context, when the vcpu is blocked, this throttling timer
2796 * will not fire; so the I/O throttling function has to be disabled here
2797 * if it has been enabled.
2798 */
2799 if (bs->io_limits_enabled) {
2800 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2801 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2802 bdrv_io_limits_disable(bs);
2803 }
2804
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002805 if (qemu_in_coroutine()) {
2806 /* Fast-path if already in coroutine context */
2807 bdrv_rw_co_entry(&rwco);
2808 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002809 AioContext *aio_context = bdrv_get_aio_context(bs);
2810
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002811 co = qemu_coroutine_create(bdrv_rw_co_entry);
2812 qemu_coroutine_enter(co, &rwco);
2813 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002814 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002815 }
2816 }
2817 return rwco.ret;
2818}
2819
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002820/*
2821 * Process a synchronous request using coroutines
2822 */
2823static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002824 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002825{
2826 QEMUIOVector qiov;
2827 struct iovec iov = {
2828 .iov_base = (void *)buf,
2829 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2830 };
2831
Peter Lieven75af1f32015-02-06 11:54:11 +01002832 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002833 return -EINVAL;
2834 }
2835
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002836 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002837 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2838 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002839}
2840
bellard19cb3732006-08-19 11:45:59 +00002841/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002842int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002843 uint8_t *buf, int nb_sectors)
2844{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002845 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002846}
2847
Markus Armbruster07d27a42012-06-29 17:34:29 +02002848/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2849int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2850 uint8_t *buf, int nb_sectors)
2851{
2852 bool enabled;
2853 int ret;
2854
2855 enabled = bs->io_limits_enabled;
2856 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002857 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002858 bs->io_limits_enabled = enabled;
2859 return ret;
2860}
2861
ths5fafdf22007-09-16 21:08:06 +00002862/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002863 -EIO generic I/O error (may happen for all errors)
2864 -ENOMEDIUM No media inserted.
2865 -EINVAL Invalid sector number or nb_sectors
2866 -EACCES Trying to write a read-only device
2867*/
ths5fafdf22007-09-16 21:08:06 +00002868int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002869 const uint8_t *buf, int nb_sectors)
2870{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002871 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002872}
2873
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002874int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2875 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002876{
2877 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002878 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002879}
2880
Peter Lievend75cbb52013-10-24 12:07:03 +02002881/*
2882 * Completely zero out a block device with the help of bdrv_write_zeroes.
2883 * The operation is sped up by checking the block status and only writing
2884 * zeroes to the device if they currently do not return zeroes. Optional
2885 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2886 *
2887 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2888 */
2889int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2890{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002891 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002892 int n;
2893
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002894 target_sectors = bdrv_nb_sectors(bs);
2895 if (target_sectors < 0) {
2896 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002897 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002898
Peter Lievend75cbb52013-10-24 12:07:03 +02002899 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002900 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002901 if (nb_sectors <= 0) {
2902 return 0;
2903 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002904 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002905 if (ret < 0) {
2906 error_report("error getting block status at sector %" PRId64 ": %s",
2907 sector_num, strerror(-ret));
2908 return ret;
2909 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002910 if (ret & BDRV_BLOCK_ZERO) {
2911 sector_num += n;
2912 continue;
2913 }
2914 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2915 if (ret < 0) {
2916 error_report("error writing zeroes at sector %" PRId64 ": %s",
2917 sector_num, strerror(-ret));
2918 return ret;
2919 }
2920 sector_num += n;
2921 }
2922}
2923
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002924int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002925{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002926 QEMUIOVector qiov;
2927 struct iovec iov = {
2928 .iov_base = (void *)buf,
2929 .iov_len = bytes,
2930 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002931 int ret;
bellard83f64092006-08-01 16:21:11 +00002932
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002933 if (bytes < 0) {
2934 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002935 }
2936
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002937 qemu_iovec_init_external(&qiov, &iov, 1);
2938 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2939 if (ret < 0) {
2940 return ret;
bellard83f64092006-08-01 16:21:11 +00002941 }
2942
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002943 return bytes;
bellard83f64092006-08-01 16:21:11 +00002944}
2945
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002946int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002947{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002948 int ret;
bellard83f64092006-08-01 16:21:11 +00002949
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002950 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2951 if (ret < 0) {
2952 return ret;
bellard83f64092006-08-01 16:21:11 +00002953 }
2954
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002955 return qiov->size;
2956}
2957
2958int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002959 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002960{
2961 QEMUIOVector qiov;
2962 struct iovec iov = {
2963 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002964 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002965 };
2966
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002967 if (bytes < 0) {
2968 return -EINVAL;
2969 }
2970
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002971 qemu_iovec_init_external(&qiov, &iov, 1);
2972 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002973}
bellard83f64092006-08-01 16:21:11 +00002974
Kevin Wolff08145f2010-06-16 16:38:15 +02002975/*
2976 * Writes to the file and ensures that no writes are reordered across this
2977 * request (acts as a barrier)
2978 *
2979 * Returns 0 on success, -errno in error cases.
2980 */
2981int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2982 const void *buf, int count)
2983{
2984 int ret;
2985
2986 ret = bdrv_pwrite(bs, offset, buf, count);
2987 if (ret < 0) {
2988 return ret;
2989 }
2990
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002991 /* No flush needed for cache modes that already do it */
2992 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002993 bdrv_flush(bs);
2994 }
2995
2996 return 0;
2997}
2998
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002999static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003000 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3001{
3002 /* Perform I/O through a temporary buffer so that users who scribble over
3003 * their read buffer while the operation is in progress do not end up
3004 * modifying the image file. This is critical for zero-copy guest I/O
3005 * where anything might happen inside guest memory.
3006 */
3007 void *bounce_buffer;
3008
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003009 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00003010 struct iovec iov;
3011 QEMUIOVector bounce_qiov;
3012 int64_t cluster_sector_num;
3013 int cluster_nb_sectors;
3014 size_t skip_bytes;
3015 int ret;
3016
3017 /* Cover entire cluster so no additional backing file I/O is required when
3018 * allocating cluster in the image file.
3019 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003020 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3021 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003022
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003023 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3024 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003025
3026 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003027 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3028 if (bounce_buffer == NULL) {
3029 ret = -ENOMEM;
3030 goto err;
3031 }
3032
Stefan Hajnocziab185922011-11-17 13:40:31 +00003033 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3034
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003035 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3036 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003037 if (ret < 0) {
3038 goto err;
3039 }
3040
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003041 if (drv->bdrv_co_write_zeroes &&
3042 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003043 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003044 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003045 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003046 /* This does not change the data on the disk, it is not necessary
3047 * to flush even in cache=writethrough mode.
3048 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003049 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003050 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003051 }
3052
Stefan Hajnocziab185922011-11-17 13:40:31 +00003053 if (ret < 0) {
3054 /* It might be okay to ignore write errors for guest requests. If this
3055 * is a deliberate copy-on-read then we don't want to ignore the error.
3056 * Simply report it in all cases.
3057 */
3058 goto err;
3059 }
3060
3061 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003062 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3063 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003064
3065err:
3066 qemu_vfree(bounce_buffer);
3067 return ret;
3068}
3069
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003070/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003071 * Forwards an already correctly aligned request to the BlockDriver. This
3072 * handles copy on read and zeroing after EOF; any other features must be
3073 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003074 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003075static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003076 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003077 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003078{
3079 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003080 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003081
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003082 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3083 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003084
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003085 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3086 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003087 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003088
3089 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003090 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003091 /* If we touch the same cluster it counts as an overlap. This
3092 * guarantees that allocating writes will be serialized and not race
3093 * with each other for the same cluster. For example, in copy-on-read
3094 * it ensures that the CoR read and write operations are atomic and
3095 * guest writes cannot interleave between them. */
3096 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003097 }
3098
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003099 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003100
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003101 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003102 int pnum;
3103
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003104 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003105 if (ret < 0) {
3106 goto out;
3107 }
3108
3109 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003110 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003111 goto out;
3112 }
3113 }
3114
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003115 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003116 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003117 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3118 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003119 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003120 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003121
Markus Armbruster40490822014-06-26 13:23:19 +02003122 total_sectors = bdrv_nb_sectors(bs);
3123 if (total_sectors < 0) {
3124 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003125 goto out;
3126 }
3127
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003128 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3129 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003130 if (nb_sectors < max_nb_sectors) {
3131 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3132 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003133 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003134
3135 qemu_iovec_init(&local_qiov, qiov->niov);
3136 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003137 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003138
Paolo Bonzinie012b782014-12-17 16:09:59 +01003139 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003140 &local_qiov);
3141
3142 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003143 } else {
3144 ret = 0;
3145 }
3146
3147 /* Reading beyond end of file is supposed to produce zeroes */
3148 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3149 uint64_t offset = MAX(0, total_sectors - sector_num);
3150 uint64_t bytes = (sector_num + nb_sectors - offset) *
3151 BDRV_SECTOR_SIZE;
3152 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3153 }
3154 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003155
3156out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003157 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003158}
3159
Fam Zhengfc3959e2015-03-24 09:23:49 +08003160static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3161{
3162 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3163 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3164}
3165
3166static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3167 int64_t offset, size_t bytes)
3168{
3169 int64_t align = bdrv_get_align(bs);
3170 return !(offset & (align - 1) || (bytes & (align - 1)));
3171}
3172
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003173/*
3174 * Handle a read request in coroutine context
3175 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003176static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3177 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003178 BdrvRequestFlags flags)
3179{
3180 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003181 BdrvTrackedRequest req;
3182
Fam Zhengfc3959e2015-03-24 09:23:49 +08003183 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003184 uint8_t *head_buf = NULL;
3185 uint8_t *tail_buf = NULL;
3186 QEMUIOVector local_qiov;
3187 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003188 int ret;
3189
3190 if (!drv) {
3191 return -ENOMEDIUM;
3192 }
Max Reitzb9c64942015-02-05 13:58:25 -05003193
3194 ret = bdrv_check_byte_request(bs, offset, bytes);
3195 if (ret < 0) {
3196 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003197 }
3198
3199 if (bs->copy_on_read) {
3200 flags |= BDRV_REQ_COPY_ON_READ;
3201 }
3202
3203 /* throttling disk I/O */
3204 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003205 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003206 }
3207
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003208 /* Align read if necessary by padding qiov */
3209 if (offset & (align - 1)) {
3210 head_buf = qemu_blockalign(bs, align);
3211 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3212 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3213 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3214 use_local_qiov = true;
3215
3216 bytes += offset & (align - 1);
3217 offset = offset & ~(align - 1);
3218 }
3219
3220 if ((offset + bytes) & (align - 1)) {
3221 if (!use_local_qiov) {
3222 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3223 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3224 use_local_qiov = true;
3225 }
3226 tail_buf = qemu_blockalign(bs, align);
3227 qemu_iovec_add(&local_qiov, tail_buf,
3228 align - ((offset + bytes) & (align - 1)));
3229
3230 bytes = ROUND_UP(bytes, align);
3231 }
3232
Kevin Wolf65afd212013-12-03 14:55:55 +01003233 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003234 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003235 use_local_qiov ? &local_qiov : qiov,
3236 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003237 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003238
3239 if (use_local_qiov) {
3240 qemu_iovec_destroy(&local_qiov);
3241 qemu_vfree(head_buf);
3242 qemu_vfree(tail_buf);
3243 }
3244
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003245 return ret;
3246}
3247
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003248static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3249 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3250 BdrvRequestFlags flags)
3251{
Peter Lieven75af1f32015-02-06 11:54:11 +01003252 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003253 return -EINVAL;
3254 }
3255
3256 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3257 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3258}
3259
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003260int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003261 int nb_sectors, QEMUIOVector *qiov)
3262{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003263 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003264
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003265 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3266}
3267
3268int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3269 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3270{
3271 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3272
3273 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3274 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003275}
3276
Peter Lieven98764152015-02-02 15:48:34 +01003277#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003278
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003279static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003280 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003281{
3282 BlockDriver *drv = bs->drv;
3283 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003284 struct iovec iov = {0};
3285 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003286
Peter Lieven75af1f32015-02-06 11:54:11 +01003287 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3288 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003289
Peter Lievenc31cb702013-10-24 12:06:58 +02003290 while (nb_sectors > 0 && !ret) {
3291 int num = nb_sectors;
3292
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003293 /* Align request. Block drivers can expect the "bulk" of the request
3294 * to be aligned.
3295 */
3296 if (bs->bl.write_zeroes_alignment
3297 && num > bs->bl.write_zeroes_alignment) {
3298 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3299 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003300 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003301 num -= sector_num % bs->bl.write_zeroes_alignment;
3302 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3303 /* Shorten the request to the last aligned sector. num cannot
3304 * underflow because num > bs->bl.write_zeroes_alignment.
3305 */
3306 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003307 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003308 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003309
3310 /* limit request size */
3311 if (num > max_write_zeroes) {
3312 num = max_write_zeroes;
3313 }
3314
3315 ret = -ENOTSUP;
3316 /* First try the efficient write zeroes operation */
3317 if (drv->bdrv_co_write_zeroes) {
3318 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3319 }
3320
3321 if (ret == -ENOTSUP) {
3322 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003323 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003324 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003325 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003326 iov.iov_len = num * BDRV_SECTOR_SIZE;
3327 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003328 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3329 if (iov.iov_base == NULL) {
3330 ret = -ENOMEM;
3331 goto fail;
3332 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003333 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003334 }
3335 qemu_iovec_init_external(&qiov, &iov, 1);
3336
3337 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003338
3339 /* Keep bounce buffer around if it is big enough for all
3340 * all future requests.
3341 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003342 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003343 qemu_vfree(iov.iov_base);
3344 iov.iov_base = NULL;
3345 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003346 }
3347
3348 sector_num += num;
3349 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003350 }
3351
Kevin Wolf857d4f42014-05-20 13:16:51 +02003352fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003353 qemu_vfree(iov.iov_base);
3354 return ret;
3355}
3356
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003357/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003358 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003359 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003360static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003361 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3362 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003363{
3364 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003365 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003366 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003367
Kevin Wolfb404f722013-12-03 14:02:23 +01003368 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3369 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003370
Kevin Wolfb404f722013-12-03 14:02:23 +01003371 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3372 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003373 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003374
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003375 waited = wait_serialising_requests(req);
3376 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003377 assert(req->overlap_offset <= offset);
3378 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003379
Kevin Wolf65afd212013-12-03 14:55:55 +01003380 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003381
Peter Lieven465bee12014-05-18 00:58:19 +02003382 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3383 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3384 qemu_iovec_is_zero(qiov)) {
3385 flags |= BDRV_REQ_ZERO_WRITE;
3386 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3387 flags |= BDRV_REQ_MAY_UNMAP;
3388 }
3389 }
3390
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003391 if (ret < 0) {
3392 /* Do nothing, write notifier decided to fail this request */
3393 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003394 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003395 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003396 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003397 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003398 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3399 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003400 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003401
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003402 if (ret == 0 && !bs->enable_write_cache) {
3403 ret = bdrv_co_flush(bs);
3404 }
3405
Fam Zhenge4654d22013-11-13 18:29:43 +08003406 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003407
Benoît Canet5366d0c2014-09-05 15:46:18 +02003408 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003409
Max Reitzc0191e72015-02-05 13:58:24 -05003410 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003411 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3412 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003413
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003414 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003415}
3416
Kevin Wolfb404f722013-12-03 14:02:23 +01003417/*
3418 * Handle a write request in coroutine context
3419 */
Kevin Wolf66015532013-12-03 14:40:18 +01003420static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3421 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003422 BdrvRequestFlags flags)
3423{
Kevin Wolf65afd212013-12-03 14:55:55 +01003424 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003425 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003426 uint8_t *head_buf = NULL;
3427 uint8_t *tail_buf = NULL;
3428 QEMUIOVector local_qiov;
3429 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003430 int ret;
3431
3432 if (!bs->drv) {
3433 return -ENOMEDIUM;
3434 }
3435 if (bs->read_only) {
3436 return -EACCES;
3437 }
Max Reitzb9c64942015-02-05 13:58:25 -05003438
3439 ret = bdrv_check_byte_request(bs, offset, bytes);
3440 if (ret < 0) {
3441 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003442 }
3443
Kevin Wolfb404f722013-12-03 14:02:23 +01003444 /* throttling disk I/O */
3445 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003446 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003447 }
3448
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003449 /*
3450 * Align write if necessary by performing a read-modify-write cycle.
3451 * Pad qiov with the read parts and be sure to have a tracked request not
3452 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3453 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003454 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003455
3456 if (offset & (align - 1)) {
3457 QEMUIOVector head_qiov;
3458 struct iovec head_iov;
3459
3460 mark_request_serialising(&req, align);
3461 wait_serialising_requests(&req);
3462
3463 head_buf = qemu_blockalign(bs, align);
3464 head_iov = (struct iovec) {
3465 .iov_base = head_buf,
3466 .iov_len = align,
3467 };
3468 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3469
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003470 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003471 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3472 align, &head_qiov, 0);
3473 if (ret < 0) {
3474 goto fail;
3475 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003476 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003477
3478 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3479 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3480 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3481 use_local_qiov = true;
3482
3483 bytes += offset & (align - 1);
3484 offset = offset & ~(align - 1);
3485 }
3486
3487 if ((offset + bytes) & (align - 1)) {
3488 QEMUIOVector tail_qiov;
3489 struct iovec tail_iov;
3490 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003491 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003492
3493 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003494 waited = wait_serialising_requests(&req);
3495 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003496
3497 tail_buf = qemu_blockalign(bs, align);
3498 tail_iov = (struct iovec) {
3499 .iov_base = tail_buf,
3500 .iov_len = align,
3501 };
3502 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3503
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003504 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003505 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3506 align, &tail_qiov, 0);
3507 if (ret < 0) {
3508 goto fail;
3509 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003510 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003511
3512 if (!use_local_qiov) {
3513 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3514 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3515 use_local_qiov = true;
3516 }
3517
3518 tail_bytes = (offset + bytes) & (align - 1);
3519 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3520
3521 bytes = ROUND_UP(bytes, align);
3522 }
3523
Fam Zhengfc3959e2015-03-24 09:23:49 +08003524 if (use_local_qiov) {
3525 /* Local buffer may have non-zero data. */
3526 flags &= ~BDRV_REQ_ZERO_WRITE;
3527 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003528 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3529 use_local_qiov ? &local_qiov : qiov,
3530 flags);
3531
3532fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003533 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003534
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003535 if (use_local_qiov) {
3536 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003537 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003538 qemu_vfree(head_buf);
3539 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003540
Kevin Wolfb404f722013-12-03 14:02:23 +01003541 return ret;
3542}
3543
Kevin Wolf66015532013-12-03 14:40:18 +01003544static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3545 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3546 BdrvRequestFlags flags)
3547{
Peter Lieven75af1f32015-02-06 11:54:11 +01003548 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003549 return -EINVAL;
3550 }
3551
3552 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3553 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3554}
3555
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003556int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3557 int nb_sectors, QEMUIOVector *qiov)
3558{
3559 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3560
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003561 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3562}
3563
3564int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003565 int64_t sector_num, int nb_sectors,
3566 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003567{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003568 int ret;
3569
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003570 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003571
Peter Lievend32f35c2013-10-24 12:06:52 +02003572 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3573 flags &= ~BDRV_REQ_MAY_UNMAP;
3574 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003575 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3576 nb_sectors << BDRV_SECTOR_BITS)) {
3577 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3578 BDRV_REQ_ZERO_WRITE | flags);
3579 } else {
3580 uint8_t *buf;
3581 QEMUIOVector local_qiov;
3582 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003583
Fam Zhengfc3959e2015-03-24 09:23:49 +08003584 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3585 memset(buf, 0, bytes);
3586 qemu_iovec_init(&local_qiov, 1);
3587 qemu_iovec_add(&local_qiov, buf, bytes);
3588
3589 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3590 BDRV_REQ_ZERO_WRITE | flags);
3591 qemu_vfree(buf);
3592 }
3593 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003594}
3595
bellard83f64092006-08-01 16:21:11 +00003596/**
bellard83f64092006-08-01 16:21:11 +00003597 * Truncate file to 'offset' bytes (needed only for file protocols)
3598 */
3599int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3600{
3601 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003602 int ret;
bellard83f64092006-08-01 16:21:11 +00003603 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003604 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003605 if (!drv->bdrv_truncate)
3606 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003607 if (bs->read_only)
3608 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003609
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003610 ret = drv->bdrv_truncate(bs, offset);
3611 if (ret == 0) {
3612 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003613 if (bs->blk) {
3614 blk_dev_resize_cb(bs->blk);
3615 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003616 }
3617 return ret;
bellard83f64092006-08-01 16:21:11 +00003618}
3619
3620/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003621 * Length of a allocated file in bytes. Sparse files are counted by actual
3622 * allocated space. Return < 0 if error or unknown.
3623 */
3624int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3625{
3626 BlockDriver *drv = bs->drv;
3627 if (!drv) {
3628 return -ENOMEDIUM;
3629 }
3630 if (drv->bdrv_get_allocated_file_size) {
3631 return drv->bdrv_get_allocated_file_size(bs);
3632 }
3633 if (bs->file) {
3634 return bdrv_get_allocated_file_size(bs->file);
3635 }
3636 return -ENOTSUP;
3637}
3638
3639/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003640 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003641 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003642int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003643{
3644 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003645
bellard83f64092006-08-01 16:21:11 +00003646 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003647 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003648
Kevin Wolfb94a2612013-10-29 12:18:58 +01003649 if (drv->has_variable_length) {
3650 int ret = refresh_total_sectors(bs, bs->total_sectors);
3651 if (ret < 0) {
3652 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003653 }
bellard83f64092006-08-01 16:21:11 +00003654 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003655 return bs->total_sectors;
3656}
3657
3658/**
3659 * Return length in bytes on success, -errno on error.
3660 * The length is always a multiple of BDRV_SECTOR_SIZE.
3661 */
3662int64_t bdrv_getlength(BlockDriverState *bs)
3663{
3664 int64_t ret = bdrv_nb_sectors(bs);
3665
3666 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003667}
3668
bellard19cb3732006-08-19 11:45:59 +00003669/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003670void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003671{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003672 int64_t nb_sectors = bdrv_nb_sectors(bs);
3673
3674 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003675}
bellardcf989512004-02-16 21:56:36 +00003676
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003677void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3678 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003679{
3680 bs->on_read_error = on_read_error;
3681 bs->on_write_error = on_write_error;
3682}
3683
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003684BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003685{
3686 return is_read ? bs->on_read_error : bs->on_write_error;
3687}
3688
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003689BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3690{
3691 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3692
3693 switch (on_err) {
3694 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003695 return (error == ENOSPC) ?
3696 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003697 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003698 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003699 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003700 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003701 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003702 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003703 default:
3704 abort();
3705 }
3706}
3707
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003708static void send_qmp_error_event(BlockDriverState *bs,
3709 BlockErrorAction action,
3710 bool is_read, int error)
3711{
Peter Maydell573742a2014-10-10 20:33:03 +01003712 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003713
Peter Maydell573742a2014-10-10 20:33:03 +01003714 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3715 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003716 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003717 error == ENOSPC, strerror(error),
3718 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003719}
3720
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003721/* This is done by device models because, while the block layer knows
3722 * about the error, it does not know whether an operation comes from
3723 * the device or the block layer (from a job, for example).
3724 */
3725void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3726 bool is_read, int error)
3727{
3728 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003729
Wenchao Xiaa5895692014-06-18 08:43:30 +02003730 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003731 /* First set the iostatus, so that "info block" returns an iostatus
3732 * that matches the events raised so far (an additional error iostatus
3733 * is fine, but not a lost one).
3734 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003735 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003736
3737 /* Then raise the request to stop the VM and the event.
3738 * qemu_system_vmstop_request_prepare has two effects. First,
3739 * it ensures that the STOP event always comes after the
3740 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3741 * can observe the STOP event and do a "cont" before the STOP
3742 * event is issued, the VM will not stop. In this case, vm_start()
3743 * also ensures that the STOP/RESUME pair of events is emitted.
3744 */
3745 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003746 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003747 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3748 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003749 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003750 }
3751}
3752
bellardb3380822004-03-14 21:38:54 +00003753int bdrv_is_read_only(BlockDriverState *bs)
3754{
3755 return bs->read_only;
3756}
3757
ths985a03b2007-12-24 16:10:43 +00003758int bdrv_is_sg(BlockDriverState *bs)
3759{
3760 return bs->sg;
3761}
3762
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003763int bdrv_enable_write_cache(BlockDriverState *bs)
3764{
3765 return bs->enable_write_cache;
3766}
3767
Paolo Bonzini425b0142012-06-06 00:04:52 +02003768void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3769{
3770 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003771
3772 /* so a reopen() will preserve wce */
3773 if (wce) {
3774 bs->open_flags |= BDRV_O_CACHE_WB;
3775 } else {
3776 bs->open_flags &= ~BDRV_O_CACHE_WB;
3777 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003778}
3779
bellardea2384d2004-08-01 21:59:26 +00003780int bdrv_is_encrypted(BlockDriverState *bs)
3781{
3782 if (bs->backing_hd && bs->backing_hd->encrypted)
3783 return 1;
3784 return bs->encrypted;
3785}
3786
aliguoric0f4ce72009-03-05 23:01:01 +00003787int bdrv_key_required(BlockDriverState *bs)
3788{
3789 BlockDriverState *backing_hd = bs->backing_hd;
3790
3791 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3792 return 1;
3793 return (bs->encrypted && !bs->valid_key);
3794}
3795
bellardea2384d2004-08-01 21:59:26 +00003796int bdrv_set_key(BlockDriverState *bs, const char *key)
3797{
3798 int ret;
3799 if (bs->backing_hd && bs->backing_hd->encrypted) {
3800 ret = bdrv_set_key(bs->backing_hd, key);
3801 if (ret < 0)
3802 return ret;
3803 if (!bs->encrypted)
3804 return 0;
3805 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003806 if (!bs->encrypted) {
3807 return -EINVAL;
3808 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3809 return -ENOMEDIUM;
3810 }
aliguoric0f4ce72009-03-05 23:01:01 +00003811 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003812 if (ret < 0) {
3813 bs->valid_key = 0;
3814 } else if (!bs->valid_key) {
3815 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003816 if (bs->blk) {
3817 /* call the change callback now, we skipped it on open */
3818 blk_dev_change_media_cb(bs->blk, true);
3819 }
aliguoribb5fc202009-03-05 23:01:15 +00003820 }
aliguoric0f4ce72009-03-05 23:01:01 +00003821 return ret;
bellardea2384d2004-08-01 21:59:26 +00003822}
3823
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003824/*
3825 * Provide an encryption key for @bs.
3826 * If @key is non-null:
3827 * If @bs is not encrypted, fail.
3828 * Else if the key is invalid, fail.
3829 * Else set @bs's key to @key, replacing the existing key, if any.
3830 * If @key is null:
3831 * If @bs is encrypted and still lacks a key, fail.
3832 * Else do nothing.
3833 * On failure, store an error object through @errp if non-null.
3834 */
3835void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3836{
3837 if (key) {
3838 if (!bdrv_is_encrypted(bs)) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03003839 error_setg(errp, "Node '%s' is not encrypted",
3840 bdrv_get_device_or_node_name(bs));
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003841 } else if (bdrv_set_key(bs, key) < 0) {
3842 error_set(errp, QERR_INVALID_PASSWORD);
3843 }
3844 } else {
3845 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003846 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3847 "'%s' (%s) is encrypted",
Alberto Garcia81e5f782015-04-08 12:29:19 +03003848 bdrv_get_device_or_node_name(bs),
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003849 bdrv_get_encrypted_filename(bs));
3850 }
3851 }
3852}
3853
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003854const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003855{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003856 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003857}
3858
Stefan Hajnocziada42402014-08-27 12:08:55 +01003859static int qsort_strcmp(const void *a, const void *b)
3860{
3861 return strcmp(a, b);
3862}
3863
ths5fafdf22007-09-16 21:08:06 +00003864void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003865 void *opaque)
3866{
3867 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003868 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003869 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003870 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003871
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003872 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003873 if (drv->format_name) {
3874 bool found = false;
3875 int i = count;
3876 while (formats && i && !found) {
3877 found = !strcmp(formats[--i], drv->format_name);
3878 }
3879
3880 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003881 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003882 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003883 }
3884 }
bellardea2384d2004-08-01 21:59:26 +00003885 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003886
3887 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3888
3889 for (i = 0; i < count; i++) {
3890 it(opaque, formats[i]);
3891 }
3892
Jeff Codye855e4f2014-04-28 18:29:54 -04003893 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003894}
3895
Benoît Canetdc364f42014-01-23 21:31:32 +01003896/* This function is to find a node in the bs graph */
3897BlockDriverState *bdrv_find_node(const char *node_name)
3898{
3899 BlockDriverState *bs;
3900
3901 assert(node_name);
3902
3903 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3904 if (!strcmp(node_name, bs->node_name)) {
3905 return bs;
3906 }
3907 }
3908 return NULL;
3909}
3910
Benoît Canetc13163f2014-01-23 21:31:34 +01003911/* Put this QMP function here so it can access the static graph_bdrv_states. */
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003912BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
Benoît Canetc13163f2014-01-23 21:31:34 +01003913{
3914 BlockDeviceInfoList *list, *entry;
3915 BlockDriverState *bs;
3916
3917 list = NULL;
3918 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003919 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3920 if (!info) {
3921 qapi_free_BlockDeviceInfoList(list);
3922 return NULL;
3923 }
Benoît Canetc13163f2014-01-23 21:31:34 +01003924 entry = g_malloc0(sizeof(*entry));
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003925 entry->value = info;
Benoît Canetc13163f2014-01-23 21:31:34 +01003926 entry->next = list;
3927 list = entry;
3928 }
3929
3930 return list;
3931}
3932
Benoît Canet12d3ba82014-01-23 21:31:35 +01003933BlockDriverState *bdrv_lookup_bs(const char *device,
3934 const char *node_name,
3935 Error **errp)
3936{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003937 BlockBackend *blk;
3938 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003939
Benoît Canet12d3ba82014-01-23 21:31:35 +01003940 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003941 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003942
Markus Armbruster7f06d472014-10-07 13:59:12 +02003943 if (blk) {
3944 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003945 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003946 }
3947
Benoît Canetdd67fa52014-02-12 17:15:06 +01003948 if (node_name) {
3949 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003950
Benoît Canetdd67fa52014-02-12 17:15:06 +01003951 if (bs) {
3952 return bs;
3953 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003954 }
3955
Benoît Canetdd67fa52014-02-12 17:15:06 +01003956 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3957 device ? device : "",
3958 node_name ? node_name : "");
3959 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003960}
3961
Jeff Cody5a6684d2014-06-25 15:40:09 -04003962/* If 'base' is in the same chain as 'top', return true. Otherwise,
3963 * return false. If either argument is NULL, return false. */
3964bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3965{
3966 while (top && top != base) {
3967 top = top->backing_hd;
3968 }
3969
3970 return top != NULL;
3971}
3972
Fam Zheng04df7652014-10-31 11:32:54 +08003973BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3974{
3975 if (!bs) {
3976 return QTAILQ_FIRST(&graph_bdrv_states);
3977 }
3978 return QTAILQ_NEXT(bs, node_list);
3979}
3980
Markus Armbruster2f399b02010-06-02 18:55:20 +02003981BlockDriverState *bdrv_next(BlockDriverState *bs)
3982{
3983 if (!bs) {
3984 return QTAILQ_FIRST(&bdrv_states);
3985 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003986 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003987}
3988
Fam Zheng20a9e772014-10-31 11:32:55 +08003989const char *bdrv_get_node_name(const BlockDriverState *bs)
3990{
3991 return bs->node_name;
3992}
3993
Markus Armbruster7f06d472014-10-07 13:59:12 +02003994/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003995const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003996{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003997 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003998}
3999
Alberto Garcia9b2aa842015-04-08 12:29:18 +03004000/* This can be used to identify nodes that might not have a device
4001 * name associated. Since node and device names live in the same
4002 * namespace, the result is unambiguous. The exception is if both are
4003 * absent, then this returns an empty (non-null) string. */
4004const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
4005{
4006 return bs->blk ? blk_name(bs->blk) : bs->node_name;
4007}
4008
Markus Armbrusterc8433282012-06-05 16:49:24 +02004009int bdrv_get_flags(BlockDriverState *bs)
4010{
4011 return bs->open_flags;
4012}
4013
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004014int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00004015{
4016 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004017 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00004018
Benoît Canetdc364f42014-01-23 21:31:32 +01004019 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004020 AioContext *aio_context = bdrv_get_aio_context(bs);
4021 int ret;
4022
4023 aio_context_acquire(aio_context);
4024 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004025 if (ret < 0 && !result) {
4026 result = ret;
4027 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004028 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01004029 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004030
4031 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00004032}
4033
Peter Lieven3ac21622013-06-28 12:47:42 +02004034int bdrv_has_zero_init_1(BlockDriverState *bs)
4035{
4036 return 1;
4037}
4038
Kevin Wolff2feebb2010-04-14 17:30:35 +02004039int bdrv_has_zero_init(BlockDriverState *bs)
4040{
4041 assert(bs->drv);
4042
Paolo Bonzini11212d82013-09-04 19:00:27 +02004043 /* If BS is a copy on write image, it is initialized to
4044 the contents of the base image, which may not be zeroes. */
4045 if (bs->backing_hd) {
4046 return 0;
4047 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004048 if (bs->drv->bdrv_has_zero_init) {
4049 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004050 }
4051
Peter Lieven3ac21622013-06-28 12:47:42 +02004052 /* safe default */
4053 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004054}
4055
Peter Lieven4ce78692013-10-24 12:06:54 +02004056bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4057{
4058 BlockDriverInfo bdi;
4059
4060 if (bs->backing_hd) {
4061 return false;
4062 }
4063
4064 if (bdrv_get_info(bs, &bdi) == 0) {
4065 return bdi.unallocated_blocks_are_zero;
4066 }
4067
4068 return false;
4069}
4070
4071bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4072{
4073 BlockDriverInfo bdi;
4074
4075 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4076 return false;
4077 }
4078
4079 if (bdrv_get_info(bs, &bdi) == 0) {
4080 return bdi.can_write_zeroes_with_unmap;
4081 }
4082
4083 return false;
4084}
4085
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004086typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004087 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004088 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004089 int64_t sector_num;
4090 int nb_sectors;
4091 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004092 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004093 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004094} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004095
thsf58c7b32008-06-05 21:53:49 +00004096/*
Fam Zheng705be722014-11-10 17:10:38 +08004097 * Returns the allocation status of the specified sectors.
4098 * Drivers not implementing the functionality are assumed to not support
4099 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004100 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004101 * If 'sector_num' is beyond the end of the disk image the return value is 0
4102 * and 'pnum' is set to 0.
4103 *
thsf58c7b32008-06-05 21:53:49 +00004104 * 'pnum' is set to the number of sectors (including and immediately following
4105 * the specified sector) that are known to be in the same
4106 * allocated/unallocated state.
4107 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004108 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4109 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004110 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004111static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4112 int64_t sector_num,
4113 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004114{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004115 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004116 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004117 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004118
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004119 total_sectors = bdrv_nb_sectors(bs);
4120 if (total_sectors < 0) {
4121 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004122 }
4123
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004124 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004125 *pnum = 0;
4126 return 0;
4127 }
4128
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004129 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004130 if (n < nb_sectors) {
4131 nb_sectors = n;
4132 }
4133
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004134 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004135 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004136 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004137 if (bs->drv->protocol_name) {
4138 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4139 }
4140 return ret;
thsf58c7b32008-06-05 21:53:49 +00004141 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004142
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004143 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4144 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004145 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004146 return ret;
4147 }
4148
Peter Lieven92bc50a2013-10-08 14:43:14 +02004149 if (ret & BDRV_BLOCK_RAW) {
4150 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4151 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4152 *pnum, pnum);
4153 }
4154
Kevin Wolfe88ae222014-05-06 15:25:36 +02004155 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4156 ret |= BDRV_BLOCK_ALLOCATED;
4157 }
4158
Peter Lievenc3d86882013-10-24 12:07:04 +02004159 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4160 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004161 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004162 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004163 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004164 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4165 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004166 ret |= BDRV_BLOCK_ZERO;
4167 }
4168 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004169 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004170
4171 if (bs->file &&
4172 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4173 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004174 int file_pnum;
4175
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004176 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004177 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004178 if (ret2 >= 0) {
4179 /* Ignore errors. This is just providing extra information, it
4180 * is useful but not necessary.
4181 */
Max Reitz59c9a952014-10-22 17:00:15 +02004182 if (!file_pnum) {
4183 /* !file_pnum indicates an offset at or beyond the EOF; it is
4184 * perfectly valid for the format block driver to point to such
4185 * offsets, so catch it and mark everything as zero */
4186 ret |= BDRV_BLOCK_ZERO;
4187 } else {
4188 /* Limit request to the range reported by the protocol driver */
4189 *pnum = file_pnum;
4190 ret |= (ret2 & BDRV_BLOCK_ZERO);
4191 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004192 }
4193 }
4194
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004195 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004196}
4197
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004198/* Coroutine wrapper for bdrv_get_block_status() */
4199static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004200{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004201 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004202 BlockDriverState *bs = data->bs;
4203
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004204 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4205 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004206 data->done = true;
4207}
4208
4209/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004210 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004211 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004212 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004213 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004214int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4215 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004216{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004217 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004218 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004219 .bs = bs,
4220 .sector_num = sector_num,
4221 .nb_sectors = nb_sectors,
4222 .pnum = pnum,
4223 .done = false,
4224 };
4225
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004226 if (qemu_in_coroutine()) {
4227 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004228 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004229 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004230 AioContext *aio_context = bdrv_get_aio_context(bs);
4231
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004232 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004233 qemu_coroutine_enter(co, &data);
4234 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004235 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004236 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004237 }
4238 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004239}
4240
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004241int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4242 int nb_sectors, int *pnum)
4243{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004244 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4245 if (ret < 0) {
4246 return ret;
4247 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004248 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004249}
4250
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004251/*
4252 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4253 *
4254 * Return true if the given sector is allocated in any image between
4255 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4256 * sector is allocated in any image of the chain. Return false otherwise.
4257 *
4258 * 'pnum' is set to the number of sectors (including and immediately following
4259 * the specified sector) that are known to be in the same
4260 * allocated/unallocated state.
4261 *
4262 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004263int bdrv_is_allocated_above(BlockDriverState *top,
4264 BlockDriverState *base,
4265 int64_t sector_num,
4266 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004267{
4268 BlockDriverState *intermediate;
4269 int ret, n = nb_sectors;
4270
4271 intermediate = top;
4272 while (intermediate && intermediate != base) {
4273 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004274 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4275 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004276 if (ret < 0) {
4277 return ret;
4278 } else if (ret) {
4279 *pnum = pnum_inter;
4280 return 1;
4281 }
4282
4283 /*
4284 * [sector_num, nb_sectors] is unallocated on top but intermediate
4285 * might have
4286 *
4287 * [sector_num+x, nr_sectors] allocated.
4288 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004289 if (n > pnum_inter &&
4290 (intermediate == top ||
4291 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004292 n = pnum_inter;
4293 }
4294
4295 intermediate = intermediate->backing_hd;
4296 }
4297
4298 *pnum = n;
4299 return 0;
4300}
4301
aliguori045df332009-03-05 23:00:48 +00004302const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4303{
4304 if (bs->backing_hd && bs->backing_hd->encrypted)
4305 return bs->backing_file;
4306 else if (bs->encrypted)
4307 return bs->filename;
4308 else
4309 return NULL;
4310}
4311
ths5fafdf22007-09-16 21:08:06 +00004312void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004313 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004314{
Kevin Wolf3574c602011-10-26 11:02:11 +02004315 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004316}
4317
ths5fafdf22007-09-16 21:08:06 +00004318int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004319 const uint8_t *buf, int nb_sectors)
4320{
4321 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004322 int ret;
4323
4324 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004325 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004326 }
4327 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004328 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004329 }
4330 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4331 if (ret < 0) {
4332 return ret;
4333 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004334
Fam Zhenge4654d22013-11-13 18:29:43 +08004335 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004336
bellardfaea38e2006-08-05 21:31:00 +00004337 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4338}
ths3b46e622007-09-17 08:09:54 +00004339
bellardfaea38e2006-08-05 21:31:00 +00004340int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4341{
4342 BlockDriver *drv = bs->drv;
4343 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004344 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004345 if (!drv->bdrv_get_info)
4346 return -ENOTSUP;
4347 memset(bdi, 0, sizeof(*bdi));
4348 return drv->bdrv_get_info(bs, bdi);
4349}
4350
Max Reitzeae041f2013-10-09 10:46:16 +02004351ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4352{
4353 BlockDriver *drv = bs->drv;
4354 if (drv && drv->bdrv_get_specific_info) {
4355 return drv->bdrv_get_specific_info(bs);
4356 }
4357 return NULL;
4358}
4359
Christoph Hellwig45566e92009-07-10 23:11:57 +02004360int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4361 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004362{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004363 QEMUIOVector qiov;
4364 struct iovec iov = {
4365 .iov_base = (void *) buf,
4366 .iov_len = size,
4367 };
4368
4369 qemu_iovec_init_external(&qiov, &iov, 1);
4370 return bdrv_writev_vmstate(bs, &qiov, pos);
4371}
4372
4373int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4374{
aliguori178e08a2009-04-05 19:10:55 +00004375 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004376
4377 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004378 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004379 } else if (drv->bdrv_save_vmstate) {
4380 return drv->bdrv_save_vmstate(bs, qiov, pos);
4381 } else if (bs->file) {
4382 return bdrv_writev_vmstate(bs->file, qiov, pos);
4383 }
4384
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004385 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004386}
4387
Christoph Hellwig45566e92009-07-10 23:11:57 +02004388int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4389 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004390{
4391 BlockDriver *drv = bs->drv;
4392 if (!drv)
4393 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004394 if (drv->bdrv_load_vmstate)
4395 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4396 if (bs->file)
4397 return bdrv_load_vmstate(bs->file, buf, pos, size);
4398 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004399}
4400
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004401void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4402{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004403 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004404 return;
4405 }
4406
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004407 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004408}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004409
Kevin Wolf41c695c2012-12-06 14:32:58 +01004410int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4411 const char *tag)
4412{
4413 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4414 bs = bs->file;
4415 }
4416
4417 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4418 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4419 }
4420
4421 return -ENOTSUP;
4422}
4423
Fam Zheng4cc70e92013-11-20 10:01:54 +08004424int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4425{
4426 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4427 bs = bs->file;
4428 }
4429
4430 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4431 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4432 }
4433
4434 return -ENOTSUP;
4435}
4436
Kevin Wolf41c695c2012-12-06 14:32:58 +01004437int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4438{
Max Reitz938789e2014-03-10 23:44:08 +01004439 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004440 bs = bs->file;
4441 }
4442
4443 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4444 return bs->drv->bdrv_debug_resume(bs, tag);
4445 }
4446
4447 return -ENOTSUP;
4448}
4449
4450bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4451{
4452 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4453 bs = bs->file;
4454 }
4455
4456 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4457 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4458 }
4459
4460 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004461}
4462
Blue Swirl199630b2010-07-25 20:49:34 +00004463int bdrv_is_snapshot(BlockDriverState *bs)
4464{
4465 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4466}
4467
Jeff Codyb1b1d782012-10-16 15:49:09 -04004468/* backing_file can either be relative, or absolute, or a protocol. If it is
4469 * relative, it must be relative to the chain. So, passing in bs->filename
4470 * from a BDS as backing_file should not be done, as that may be relative to
4471 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004472BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4473 const char *backing_file)
4474{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004475 char *filename_full = NULL;
4476 char *backing_file_full = NULL;
4477 char *filename_tmp = NULL;
4478 int is_protocol = 0;
4479 BlockDriverState *curr_bs = NULL;
4480 BlockDriverState *retval = NULL;
4481
4482 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004483 return NULL;
4484 }
4485
Jeff Codyb1b1d782012-10-16 15:49:09 -04004486 filename_full = g_malloc(PATH_MAX);
4487 backing_file_full = g_malloc(PATH_MAX);
4488 filename_tmp = g_malloc(PATH_MAX);
4489
4490 is_protocol = path_has_protocol(backing_file);
4491
4492 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4493
4494 /* If either of the filename paths is actually a protocol, then
4495 * compare unmodified paths; otherwise make paths relative */
4496 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4497 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4498 retval = curr_bs->backing_hd;
4499 break;
4500 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004501 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004502 /* If not an absolute filename path, make it relative to the current
4503 * image's filename path */
4504 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4505 backing_file);
4506
4507 /* We are going to compare absolute pathnames */
4508 if (!realpath(filename_tmp, filename_full)) {
4509 continue;
4510 }
4511
4512 /* We need to make sure the backing filename we are comparing against
4513 * is relative to the current image filename (or absolute) */
4514 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4515 curr_bs->backing_file);
4516
4517 if (!realpath(filename_tmp, backing_file_full)) {
4518 continue;
4519 }
4520
4521 if (strcmp(backing_file_full, filename_full) == 0) {
4522 retval = curr_bs->backing_hd;
4523 break;
4524 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004525 }
4526 }
4527
Jeff Codyb1b1d782012-10-16 15:49:09 -04004528 g_free(filename_full);
4529 g_free(backing_file_full);
4530 g_free(filename_tmp);
4531 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004532}
4533
Benoît Canetf198fd12012-08-02 10:22:47 +02004534int bdrv_get_backing_file_depth(BlockDriverState *bs)
4535{
4536 if (!bs->drv) {
4537 return 0;
4538 }
4539
4540 if (!bs->backing_hd) {
4541 return 0;
4542 }
4543
4544 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4545}
4546
bellard83f64092006-08-01 16:21:11 +00004547/**************************************************************/
4548/* async I/Os */
4549
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004550BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4551 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004552 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004553{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004554 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4555
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004556 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004557 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004558}
4559
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004560BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4561 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004562 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004563{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004564 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4565
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004566 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004567 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004568}
4569
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004570BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004571 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004572 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004573{
4574 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4575
4576 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4577 BDRV_REQ_ZERO_WRITE | flags,
4578 cb, opaque, true);
4579}
4580
Kevin Wolf40b4f532009-09-09 17:53:37 +02004581
4582typedef struct MultiwriteCB {
4583 int error;
4584 int num_requests;
4585 int num_callbacks;
4586 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004587 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004588 void *opaque;
4589 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004590 } callbacks[];
4591} MultiwriteCB;
4592
4593static void multiwrite_user_cb(MultiwriteCB *mcb)
4594{
4595 int i;
4596
4597 for (i = 0; i < mcb->num_callbacks; i++) {
4598 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004599 if (mcb->callbacks[i].free_qiov) {
4600 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4601 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004602 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004603 }
4604}
4605
4606static void multiwrite_cb(void *opaque, int ret)
4607{
4608 MultiwriteCB *mcb = opaque;
4609
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004610 trace_multiwrite_cb(mcb, ret);
4611
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004612 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004613 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004614 }
4615
4616 mcb->num_requests--;
4617 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004618 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004619 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004620 }
4621}
4622
4623static int multiwrite_req_compare(const void *a, const void *b)
4624{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004625 const BlockRequest *req1 = a, *req2 = b;
4626
4627 /*
4628 * Note that we can't simply subtract req2->sector from req1->sector
4629 * here as that could overflow the return value.
4630 */
4631 if (req1->sector > req2->sector) {
4632 return 1;
4633 } else if (req1->sector < req2->sector) {
4634 return -1;
4635 } else {
4636 return 0;
4637 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004638}
4639
4640/*
4641 * Takes a bunch of requests and tries to merge them. Returns the number of
4642 * requests that remain after merging.
4643 */
4644static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4645 int num_reqs, MultiwriteCB *mcb)
4646{
4647 int i, outidx;
4648
4649 // Sort requests by start sector
4650 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4651
4652 // Check if adjacent requests touch the same clusters. If so, combine them,
4653 // filling up gaps with zero sectors.
4654 outidx = 0;
4655 for (i = 1; i < num_reqs; i++) {
4656 int merge = 0;
4657 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4658
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004659 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004660 if (reqs[i].sector <= oldreq_last) {
4661 merge = 1;
4662 }
4663
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004664 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4665 merge = 0;
4666 }
4667
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004668 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4669 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4670 merge = 0;
4671 }
4672
Kevin Wolf40b4f532009-09-09 17:53:37 +02004673 if (merge) {
4674 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004675 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004676 qemu_iovec_init(qiov,
4677 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4678
4679 // Add the first request to the merged one. If the requests are
4680 // overlapping, drop the last sectors of the first request.
4681 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004682 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004683
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004684 // We should need to add any zeros between the two requests
4685 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004686
4687 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004688 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004689
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004690 // Add tail of first request, if necessary
4691 if (qiov->size < reqs[outidx].qiov->size) {
4692 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4693 reqs[outidx].qiov->size - qiov->size);
4694 }
4695
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004696 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004697 reqs[outidx].qiov = qiov;
4698
4699 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4700 } else {
4701 outidx++;
4702 reqs[outidx].sector = reqs[i].sector;
4703 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4704 reqs[outidx].qiov = reqs[i].qiov;
4705 }
4706 }
4707
Peter Lievenf4564d52015-02-02 14:52:18 +01004708 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4709
Kevin Wolf40b4f532009-09-09 17:53:37 +02004710 return outidx + 1;
4711}
4712
4713/*
4714 * Submit multiple AIO write requests at once.
4715 *
4716 * On success, the function returns 0 and all requests in the reqs array have
4717 * been submitted. In error case this function returns -1, and any of the
4718 * requests may or may not be submitted yet. In particular, this means that the
4719 * callback will be called for some of the requests, for others it won't. The
4720 * caller must check the error field of the BlockRequest to wait for the right
4721 * callbacks (if error != 0, no callback will be called).
4722 *
4723 * The implementation may modify the contents of the reqs array, e.g. to merge
4724 * requests. However, the fields opaque and error are left unmodified as they
4725 * are used to signal failure for a single request to the caller.
4726 */
4727int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4728{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004729 MultiwriteCB *mcb;
4730 int i;
4731
Ryan Harper301db7c2011-03-07 10:01:04 -06004732 /* don't submit writes if we don't have a medium */
4733 if (bs->drv == NULL) {
4734 for (i = 0; i < num_reqs; i++) {
4735 reqs[i].error = -ENOMEDIUM;
4736 }
4737 return -1;
4738 }
4739
Kevin Wolf40b4f532009-09-09 17:53:37 +02004740 if (num_reqs == 0) {
4741 return 0;
4742 }
4743
4744 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004745 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004746 mcb->num_requests = 0;
4747 mcb->num_callbacks = num_reqs;
4748
4749 for (i = 0; i < num_reqs; i++) {
4750 mcb->callbacks[i].cb = reqs[i].cb;
4751 mcb->callbacks[i].opaque = reqs[i].opaque;
4752 }
4753
4754 // Check for mergable requests
4755 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4756
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004757 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4758
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004759 /* Run the aio requests. */
4760 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004761 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004762 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4763 reqs[i].nb_sectors, reqs[i].flags,
4764 multiwrite_cb, mcb,
4765 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004766 }
4767
4768 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004769}
4770
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004771void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004772{
Fam Zhengca5fd112014-09-11 13:41:27 +08004773 qemu_aio_ref(acb);
4774 bdrv_aio_cancel_async(acb);
4775 while (acb->refcnt > 1) {
4776 if (acb->aiocb_info->get_aio_context) {
4777 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4778 } else if (acb->bs) {
4779 aio_poll(bdrv_get_aio_context(acb->bs), true);
4780 } else {
4781 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004782 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004783 }
Fam Zheng80074292014-09-11 13:41:28 +08004784 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004785}
4786
4787/* Async version of aio cancel. The caller is not blocked if the acb implements
4788 * cancel_async, otherwise we do nothing and let the request normally complete.
4789 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004790void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004791{
4792 if (acb->aiocb_info->cancel_async) {
4793 acb->aiocb_info->cancel_async(acb);
4794 }
bellard83f64092006-08-01 16:21:11 +00004795}
4796
4797/**************************************************************/
4798/* async block device emulation */
4799
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004800typedef struct BlockAIOCBSync {
4801 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004802 QEMUBH *bh;
4803 int ret;
4804 /* vector translation state */
4805 QEMUIOVector *qiov;
4806 uint8_t *bounce;
4807 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004808} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004809
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004810static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004811 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004812};
4813
bellard83f64092006-08-01 16:21:11 +00004814static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004815{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004816 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004817
Kevin Wolf857d4f42014-05-20 13:16:51 +02004818 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004819 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004820 }
aliguoriceb42de2009-04-07 18:43:28 +00004821 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004822 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004823 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004824 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004825 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004826}
bellardbeac80c2006-06-26 20:08:57 +00004827
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004828static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4829 int64_t sector_num,
4830 QEMUIOVector *qiov,
4831 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004832 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004833 void *opaque,
4834 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004835
bellardea2384d2004-08-01 21:59:26 +00004836{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004837 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004838
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004839 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004840 acb->is_write = is_write;
4841 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004842 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004843 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004844
Kevin Wolf857d4f42014-05-20 13:16:51 +02004845 if (acb->bounce == NULL) {
4846 acb->ret = -ENOMEM;
4847 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004848 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004849 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004850 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004851 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004852 }
4853
pbrookce1a14d2006-08-07 02:38:06 +00004854 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004855
pbrookce1a14d2006-08-07 02:38:06 +00004856 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004857}
4858
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004859static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004860 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004861 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004862{
aliguorif141eaf2009-04-07 18:43:24 +00004863 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004864}
4865
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004866static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004867 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004868 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004869{
4870 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4871}
4872
Kevin Wolf68485422011-06-30 10:05:46 +02004873
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004874typedef struct BlockAIOCBCoroutine {
4875 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004876 BlockRequest req;
4877 bool is_write;
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004878 bool need_bh;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004879 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004880 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004881} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004882
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004883static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004884 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004885};
4886
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004887static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
4888{
4889 if (!acb->need_bh) {
4890 acb->common.cb(acb->common.opaque, acb->req.error);
4891 qemu_aio_unref(acb);
4892 }
4893}
4894
Paolo Bonzini35246a62011-10-14 10:41:29 +02004895static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004896{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004897 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004898
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004899 assert(!acb->need_bh);
Kevin Wolf68485422011-06-30 10:05:46 +02004900 qemu_bh_delete(acb->bh);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004901 bdrv_co_complete(acb);
4902}
4903
4904static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
4905{
4906 acb->need_bh = false;
4907 if (acb->req.error != -EINPROGRESS) {
4908 BlockDriverState *bs = acb->common.bs;
4909
4910 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4911 qemu_bh_schedule(acb->bh);
4912 }
Kevin Wolf68485422011-06-30 10:05:46 +02004913}
4914
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004915/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4916static void coroutine_fn bdrv_co_do_rw(void *opaque)
4917{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004918 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004919 BlockDriverState *bs = acb->common.bs;
4920
4921 if (!acb->is_write) {
4922 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004923 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004924 } else {
4925 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004926 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004927 }
4928
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004929 bdrv_co_complete(acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004930}
4931
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004932static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4933 int64_t sector_num,
4934 QEMUIOVector *qiov,
4935 int nb_sectors,
4936 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004937 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004938 void *opaque,
4939 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004940{
4941 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004942 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004943
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004944 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004945 acb->need_bh = true;
4946 acb->req.error = -EINPROGRESS;
Kevin Wolf68485422011-06-30 10:05:46 +02004947 acb->req.sector = sector_num;
4948 acb->req.nb_sectors = nb_sectors;
4949 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004950 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004951 acb->is_write = is_write;
4952
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004953 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004954 qemu_coroutine_enter(co, acb);
4955
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004956 bdrv_co_maybe_schedule_bh(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004957 return &acb->common;
4958}
4959
Paolo Bonzini07f07612011-10-17 12:32:12 +02004960static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004961{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004962 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004963 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004964
Paolo Bonzini07f07612011-10-17 12:32:12 +02004965 acb->req.error = bdrv_co_flush(bs);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004966 bdrv_co_complete(acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004967}
4968
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004969BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004970 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004971{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004972 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004973
Paolo Bonzini07f07612011-10-17 12:32:12 +02004974 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004975 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004976
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004977 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004978 acb->need_bh = true;
4979 acb->req.error = -EINPROGRESS;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004980
Paolo Bonzini07f07612011-10-17 12:32:12 +02004981 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4982 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004983
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004984 bdrv_co_maybe_schedule_bh(acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004985 return &acb->common;
4986}
4987
Paolo Bonzini4265d622011-10-17 12:32:14 +02004988static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4989{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004990 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004991 BlockDriverState *bs = acb->common.bs;
4992
4993 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004994 bdrv_co_complete(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004995}
4996
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004997BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004998 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004999 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02005000{
5001 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005002 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005003
5004 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
5005
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005006 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005007 acb->need_bh = true;
5008 acb->req.error = -EINPROGRESS;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005009 acb->req.sector = sector_num;
5010 acb->req.nb_sectors = nb_sectors;
5011 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
5012 qemu_coroutine_enter(co, acb);
5013
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005014 bdrv_co_maybe_schedule_bh(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005015 return &acb->common;
5016}
5017
bellardea2384d2004-08-01 21:59:26 +00005018void bdrv_init(void)
5019{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05005020 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00005021}
pbrookce1a14d2006-08-07 02:38:06 +00005022
Markus Armbrustereb852012009-10-27 18:41:44 +01005023void bdrv_init_with_whitelist(void)
5024{
5025 use_bdrv_whitelist = 1;
5026 bdrv_init();
5027}
5028
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005029void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02005030 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00005031{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005032 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00005033
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005034 acb = g_slice_alloc(aiocb_info->aiocb_size);
5035 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00005036 acb->bs = bs;
5037 acb->cb = cb;
5038 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08005039 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00005040 return acb;
5041}
5042
Fam Zhengf197fe22014-09-11 13:41:08 +08005043void qemu_aio_ref(void *p)
5044{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005045 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005046 acb->refcnt++;
5047}
5048
Fam Zheng80074292014-09-11 13:41:28 +08005049void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00005050{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005051 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005052 assert(acb->refcnt > 0);
5053 if (--acb->refcnt == 0) {
5054 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5055 }
pbrookce1a14d2006-08-07 02:38:06 +00005056}
bellard19cb3732006-08-19 11:45:59 +00005057
5058/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005059/* Coroutine block device emulation */
5060
5061typedef struct CoroutineIOCompletion {
5062 Coroutine *coroutine;
5063 int ret;
5064} CoroutineIOCompletion;
5065
5066static void bdrv_co_io_em_complete(void *opaque, int ret)
5067{
5068 CoroutineIOCompletion *co = opaque;
5069
5070 co->ret = ret;
5071 qemu_coroutine_enter(co->coroutine, NULL);
5072}
5073
5074static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5075 int nb_sectors, QEMUIOVector *iov,
5076 bool is_write)
5077{
5078 CoroutineIOCompletion co = {
5079 .coroutine = qemu_coroutine_self(),
5080 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005081 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005082
5083 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005084 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5085 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005086 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005087 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5088 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005089 }
5090
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005091 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005092 if (!acb) {
5093 return -EIO;
5094 }
5095 qemu_coroutine_yield();
5096
5097 return co.ret;
5098}
5099
5100static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5101 int64_t sector_num, int nb_sectors,
5102 QEMUIOVector *iov)
5103{
5104 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5105}
5106
5107static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5108 int64_t sector_num, int nb_sectors,
5109 QEMUIOVector *iov)
5110{
5111 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5112}
5113
Paolo Bonzini07f07612011-10-17 12:32:12 +02005114static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005115{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005116 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005117
Paolo Bonzini07f07612011-10-17 12:32:12 +02005118 rwco->ret = bdrv_co_flush(rwco->bs);
5119}
5120
5121int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5122{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005123 int ret;
5124
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005125 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005126 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005127 }
5128
Kevin Wolfca716362011-11-10 18:13:59 +01005129 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005130 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005131 if (bs->drv->bdrv_co_flush_to_os) {
5132 ret = bs->drv->bdrv_co_flush_to_os(bs);
5133 if (ret < 0) {
5134 return ret;
5135 }
5136 }
5137
Kevin Wolfca716362011-11-10 18:13:59 +01005138 /* But don't actually force it to the disk with cache=unsafe */
5139 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005140 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005141 }
5142
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005143 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005144 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005145 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005146 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005147 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005148 CoroutineIOCompletion co = {
5149 .coroutine = qemu_coroutine_self(),
5150 };
5151
5152 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5153 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005154 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005155 } else {
5156 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005157 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005158 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005159 } else {
5160 /*
5161 * Some block drivers always operate in either writethrough or unsafe
5162 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5163 * know how the server works (because the behaviour is hardcoded or
5164 * depends on server-side configuration), so we can't ensure that
5165 * everything is safe on disk. Returning an error doesn't work because
5166 * that would break guests even if the server operates in writethrough
5167 * mode.
5168 *
5169 * Let's hope the user knows what he's doing.
5170 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005171 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005172 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005173 if (ret < 0) {
5174 return ret;
5175 }
5176
5177 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5178 * in the case of cache=unsafe, so there are no useless flushes.
5179 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005180flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005181 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005182}
5183
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005184void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005185{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005186 Error *local_err = NULL;
5187 int ret;
5188
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005189 if (!bs->drv) {
5190 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005191 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005192
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005193 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5194 return;
5195 }
5196 bs->open_flags &= ~BDRV_O_INCOMING;
5197
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005198 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005199 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005200 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005201 bdrv_invalidate_cache(bs->file, &local_err);
5202 }
5203 if (local_err) {
5204 error_propagate(errp, local_err);
5205 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005206 }
5207
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005208 ret = refresh_total_sectors(bs, bs->total_sectors);
5209 if (ret < 0) {
5210 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5211 return;
5212 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005213}
5214
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005215void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005216{
5217 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005218 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005219
Benoît Canetdc364f42014-01-23 21:31:32 +01005220 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005221 AioContext *aio_context = bdrv_get_aio_context(bs);
5222
5223 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005224 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005225 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005226 if (local_err) {
5227 error_propagate(errp, local_err);
5228 return;
5229 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005230 }
5231}
5232
Paolo Bonzini07f07612011-10-17 12:32:12 +02005233int bdrv_flush(BlockDriverState *bs)
5234{
5235 Coroutine *co;
5236 RwCo rwco = {
5237 .bs = bs,
5238 .ret = NOT_DONE,
5239 };
5240
5241 if (qemu_in_coroutine()) {
5242 /* Fast-path if already in coroutine context */
5243 bdrv_flush_co_entry(&rwco);
5244 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005245 AioContext *aio_context = bdrv_get_aio_context(bs);
5246
Paolo Bonzini07f07612011-10-17 12:32:12 +02005247 co = qemu_coroutine_create(bdrv_flush_co_entry);
5248 qemu_coroutine_enter(co, &rwco);
5249 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005250 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005251 }
5252 }
5253
5254 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005255}
5256
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005257typedef struct DiscardCo {
5258 BlockDriverState *bs;
5259 int64_t sector_num;
5260 int nb_sectors;
5261 int ret;
5262} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005263static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5264{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005265 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005266
5267 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5268}
5269
5270int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5271 int nb_sectors)
5272{
Max Reitzb9c64942015-02-05 13:58:25 -05005273 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005274
Paolo Bonzini4265d622011-10-17 12:32:14 +02005275 if (!bs->drv) {
5276 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005277 }
5278
5279 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5280 if (ret < 0) {
5281 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005282 } else if (bs->read_only) {
5283 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005284 }
5285
Fam Zhenge4654d22013-11-13 18:29:43 +08005286 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005287
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005288 /* Do nothing if disabled. */
5289 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5290 return 0;
5291 }
5292
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005293 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005294 return 0;
5295 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005296
Peter Lieven75af1f32015-02-06 11:54:11 +01005297 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005298 while (nb_sectors > 0) {
5299 int ret;
5300 int num = nb_sectors;
5301
5302 /* align request */
5303 if (bs->bl.discard_alignment &&
5304 num >= bs->bl.discard_alignment &&
5305 sector_num % bs->bl.discard_alignment) {
5306 if (num > bs->bl.discard_alignment) {
5307 num = bs->bl.discard_alignment;
5308 }
5309 num -= sector_num % bs->bl.discard_alignment;
5310 }
5311
5312 /* limit request size */
5313 if (num > max_discard) {
5314 num = max_discard;
5315 }
5316
5317 if (bs->drv->bdrv_co_discard) {
5318 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5319 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005320 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005321 CoroutineIOCompletion co = {
5322 .coroutine = qemu_coroutine_self(),
5323 };
5324
5325 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5326 bdrv_co_io_em_complete, &co);
5327 if (acb == NULL) {
5328 return -EIO;
5329 } else {
5330 qemu_coroutine_yield();
5331 ret = co.ret;
5332 }
5333 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005334 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005335 return ret;
5336 }
5337
5338 sector_num += num;
5339 nb_sectors -= num;
5340 }
5341 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005342}
5343
5344int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5345{
5346 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005347 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005348 .bs = bs,
5349 .sector_num = sector_num,
5350 .nb_sectors = nb_sectors,
5351 .ret = NOT_DONE,
5352 };
5353
5354 if (qemu_in_coroutine()) {
5355 /* Fast-path if already in coroutine context */
5356 bdrv_discard_co_entry(&rwco);
5357 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005358 AioContext *aio_context = bdrv_get_aio_context(bs);
5359
Paolo Bonzini4265d622011-10-17 12:32:14 +02005360 co = qemu_coroutine_create(bdrv_discard_co_entry);
5361 qemu_coroutine_enter(co, &rwco);
5362 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005363 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005364 }
5365 }
5366
5367 return rwco.ret;
5368}
5369
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005370/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005371/* removable device support */
5372
5373/**
5374 * Return TRUE if the media is present
5375 */
5376int bdrv_is_inserted(BlockDriverState *bs)
5377{
5378 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005379
bellard19cb3732006-08-19 11:45:59 +00005380 if (!drv)
5381 return 0;
5382 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005383 return 1;
5384 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005385}
5386
5387/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005388 * Return whether the media changed since the last call to this
5389 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005390 */
5391int bdrv_media_changed(BlockDriverState *bs)
5392{
5393 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005394
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005395 if (drv && drv->bdrv_media_changed) {
5396 return drv->bdrv_media_changed(bs);
5397 }
5398 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005399}
5400
5401/**
5402 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5403 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005404void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005405{
5406 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005407 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005408
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005409 if (drv && drv->bdrv_eject) {
5410 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005411 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005412
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005413 device_name = bdrv_get_device_name(bs);
5414 if (device_name[0] != '\0') {
5415 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005416 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005417 }
bellard19cb3732006-08-19 11:45:59 +00005418}
5419
bellard19cb3732006-08-19 11:45:59 +00005420/**
5421 * Lock or unlock the media (if it is locked, the user won't be able
5422 * to eject it manually).
5423 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005424void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005425{
5426 BlockDriver *drv = bs->drv;
5427
Markus Armbruster025e8492011-09-06 18:58:47 +02005428 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005429
Markus Armbruster025e8492011-09-06 18:58:47 +02005430 if (drv && drv->bdrv_lock_medium) {
5431 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005432 }
5433}
ths985a03b2007-12-24 16:10:43 +00005434
5435/* needed for generic scsi interface */
5436
5437int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5438{
5439 BlockDriver *drv = bs->drv;
5440
5441 if (drv && drv->bdrv_ioctl)
5442 return drv->bdrv_ioctl(bs, req, buf);
5443 return -ENOTSUP;
5444}
aliguori7d780662009-03-12 19:57:08 +00005445
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005446BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005447 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005448 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005449{
aliguori221f7152009-03-28 17:28:41 +00005450 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005451
aliguori221f7152009-03-28 17:28:41 +00005452 if (drv && drv->bdrv_aio_ioctl)
5453 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5454 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005455}
aliguorie268ca52009-04-22 20:20:00 +00005456
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005457void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005458{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005459 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005460}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005461
aliguorie268ca52009-04-22 20:20:00 +00005462void *qemu_blockalign(BlockDriverState *bs, size_t size)
5463{
Kevin Wolf339064d2013-11-28 10:23:32 +01005464 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005465}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005466
Max Reitz9ebd8442014-10-22 14:09:27 +02005467void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5468{
5469 return memset(qemu_blockalign(bs, size), 0, size);
5470}
5471
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005472void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5473{
5474 size_t align = bdrv_opt_mem_align(bs);
5475
5476 /* Ensure that NULL is never returned on success */
5477 assert(align > 0);
5478 if (size == 0) {
5479 size = align;
5480 }
5481
5482 return qemu_try_memalign(align, size);
5483}
5484
Max Reitz9ebd8442014-10-22 14:09:27 +02005485void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5486{
5487 void *mem = qemu_try_blockalign(bs, size);
5488
5489 if (mem) {
5490 memset(mem, 0, size);
5491 }
5492
5493 return mem;
5494}
5495
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005496/*
5497 * Check if all memory in this vector is sector aligned.
5498 */
5499bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5500{
5501 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005502 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005503
5504 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005505 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005506 return false;
5507 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005508 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005509 return false;
5510 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005511 }
5512
5513 return true;
5514}
5515
Fam Zheng0db6e542015-04-17 19:49:50 -04005516BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
5517{
5518 BdrvDirtyBitmap *bm;
5519
5520 assert(name);
5521 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5522 if (bm->name && !strcmp(name, bm->name)) {
5523 return bm;
5524 }
5525 }
5526 return NULL;
5527}
5528
5529void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5530{
John Snow9bd2b082015-04-17 19:49:57 -04005531 assert(!bdrv_dirty_bitmap_frozen(bitmap));
Fam Zheng0db6e542015-04-17 19:49:50 -04005532 g_free(bitmap->name);
5533 bitmap->name = NULL;
5534}
5535
5536BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
John Snow5fba6c02015-04-17 19:49:51 -04005537 uint32_t granularity,
Fam Zheng0db6e542015-04-17 19:49:50 -04005538 const char *name,
Fam Zhengb8afb522014-04-16 09:34:30 +08005539 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005540{
5541 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005542 BdrvDirtyBitmap *bitmap;
John Snow5fba6c02015-04-17 19:49:51 -04005543 uint32_t sector_granularity;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005544
Paolo Bonzini50717e92013-01-21 17:09:45 +01005545 assert((granularity & (granularity - 1)) == 0);
5546
Fam Zheng0db6e542015-04-17 19:49:50 -04005547 if (name && bdrv_find_dirty_bitmap(bs, name)) {
5548 error_setg(errp, "Bitmap already exists: %s", name);
5549 return NULL;
5550 }
John Snow5fba6c02015-04-17 19:49:51 -04005551 sector_granularity = granularity >> BDRV_SECTOR_BITS;
5552 assert(sector_granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005553 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005554 if (bitmap_size < 0) {
5555 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5556 errno = -bitmap_size;
5557 return NULL;
5558 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005559 bitmap = g_new0(BdrvDirtyBitmap, 1);
John Snow5fba6c02015-04-17 19:49:51 -04005560 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
John Snowe74e6b72015-04-17 19:49:59 -04005561 bitmap->size = bitmap_size;
Fam Zheng0db6e542015-04-17 19:49:50 -04005562 bitmap->name = g_strdup(name);
John Snowb8e6fb72015-04-17 19:49:56 -04005563 bitmap->disabled = false;
Fam Zhenge4654d22013-11-13 18:29:43 +08005564 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5565 return bitmap;
5566}
5567
John Snow9bd2b082015-04-17 19:49:57 -04005568bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
5569{
5570 return bitmap->successor;
5571}
5572
John Snowb8e6fb72015-04-17 19:49:56 -04005573bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5574{
John Snow9bd2b082015-04-17 19:49:57 -04005575 return !(bitmap->disabled || bitmap->successor);
5576}
5577
5578/**
5579 * Create a successor bitmap destined to replace this bitmap after an operation.
5580 * Requires that the bitmap is not frozen and has no successor.
5581 */
5582int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
5583 BdrvDirtyBitmap *bitmap, Error **errp)
5584{
5585 uint64_t granularity;
5586 BdrvDirtyBitmap *child;
5587
5588 if (bdrv_dirty_bitmap_frozen(bitmap)) {
5589 error_setg(errp, "Cannot create a successor for a bitmap that is "
5590 "currently frozen");
5591 return -1;
5592 }
5593 assert(!bitmap->successor);
5594
5595 /* Create an anonymous successor */
5596 granularity = bdrv_dirty_bitmap_granularity(bitmap);
5597 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
5598 if (!child) {
5599 return -1;
5600 }
5601
5602 /* Successor will be on or off based on our current state. */
5603 child->disabled = bitmap->disabled;
5604
5605 /* Install the successor and freeze the parent */
5606 bitmap->successor = child;
5607 return 0;
5608}
5609
5610/**
5611 * For a bitmap with a successor, yield our name to the successor,
5612 * delete the old bitmap, and return a handle to the new bitmap.
5613 */
5614BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
5615 BdrvDirtyBitmap *bitmap,
5616 Error **errp)
5617{
5618 char *name;
5619 BdrvDirtyBitmap *successor = bitmap->successor;
5620
5621 if (successor == NULL) {
5622 error_setg(errp, "Cannot relinquish control if "
5623 "there's no successor present");
5624 return NULL;
5625 }
5626
5627 name = bitmap->name;
5628 bitmap->name = NULL;
5629 successor->name = name;
5630 bitmap->successor = NULL;
5631 bdrv_release_dirty_bitmap(bs, bitmap);
5632
5633 return successor;
5634}
5635
5636/**
5637 * In cases of failure where we can no longer safely delete the parent,
5638 * we may wish to re-join the parent and child/successor.
5639 * The merged parent will be un-frozen, but not explicitly re-enabled.
5640 */
5641BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
5642 BdrvDirtyBitmap *parent,
5643 Error **errp)
5644{
5645 BdrvDirtyBitmap *successor = parent->successor;
5646
5647 if (!successor) {
5648 error_setg(errp, "Cannot reclaim a successor when none is present");
5649 return NULL;
5650 }
5651
5652 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
5653 error_setg(errp, "Merging of parent and successor bitmap failed");
5654 return NULL;
5655 }
5656 bdrv_release_dirty_bitmap(bs, successor);
5657 parent->successor = NULL;
5658
5659 return parent;
John Snowb8e6fb72015-04-17 19:49:56 -04005660}
5661
Fam Zhenge4654d22013-11-13 18:29:43 +08005662void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5663{
5664 BdrvDirtyBitmap *bm, *next;
5665 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5666 if (bm == bitmap) {
John Snow9bd2b082015-04-17 19:49:57 -04005667 assert(!bdrv_dirty_bitmap_frozen(bm));
Fam Zhenge4654d22013-11-13 18:29:43 +08005668 QLIST_REMOVE(bitmap, list);
5669 hbitmap_free(bitmap->bitmap);
Fam Zheng0db6e542015-04-17 19:49:50 -04005670 g_free(bitmap->name);
Fam Zhenge4654d22013-11-13 18:29:43 +08005671 g_free(bitmap);
5672 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005673 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005674 }
5675}
5676
John Snowb8e6fb72015-04-17 19:49:56 -04005677void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5678{
John Snow9bd2b082015-04-17 19:49:57 -04005679 assert(!bdrv_dirty_bitmap_frozen(bitmap));
John Snowb8e6fb72015-04-17 19:49:56 -04005680 bitmap->disabled = true;
5681}
5682
5683void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5684{
John Snow9bd2b082015-04-17 19:49:57 -04005685 assert(!bdrv_dirty_bitmap_frozen(bitmap));
John Snowb8e6fb72015-04-17 19:49:56 -04005686 bitmap->disabled = false;
5687}
5688
Fam Zheng21b56832013-11-13 18:29:44 +08005689BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5690{
5691 BdrvDirtyBitmap *bm;
5692 BlockDirtyInfoList *list = NULL;
5693 BlockDirtyInfoList **plist = &list;
5694
5695 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005696 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5697 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005698 info->count = bdrv_get_dirty_count(bs, bm);
John Snow592fdd02015-04-17 19:49:53 -04005699 info->granularity = bdrv_dirty_bitmap_granularity(bm);
Fam Zheng0db6e542015-04-17 19:49:50 -04005700 info->has_name = !!bm->name;
5701 info->name = g_strdup(bm->name);
John Snowa1135342015-04-17 19:50:00 -04005702 info->frozen = bdrv_dirty_bitmap_frozen(bm);
Fam Zheng21b56832013-11-13 18:29:44 +08005703 entry->value = info;
5704 *plist = entry;
5705 plist = &entry->next;
5706 }
5707
5708 return list;
5709}
5710
Fam Zhenge4654d22013-11-13 18:29:43 +08005711int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005712{
Fam Zhenge4654d22013-11-13 18:29:43 +08005713 if (bitmap) {
5714 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005715 } else {
5716 return 0;
5717 }
5718}
5719
John Snow341ebc22015-04-17 19:49:52 -04005720/**
5721 * Chooses a default granularity based on the existing cluster size,
5722 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5723 * is no cluster size information available.
5724 */
5725uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5726{
5727 BlockDriverInfo bdi;
5728 uint32_t granularity;
5729
5730 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5731 granularity = MAX(4096, bdi.cluster_size);
5732 granularity = MIN(65536, granularity);
5733 } else {
5734 granularity = 65536;
5735 }
5736
5737 return granularity;
5738}
5739
John Snow592fdd02015-04-17 19:49:53 -04005740uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5741{
5742 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5743}
5744
Fam Zhenge4654d22013-11-13 18:29:43 +08005745void bdrv_dirty_iter_init(BlockDriverState *bs,
5746 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005747{
Fam Zhenge4654d22013-11-13 18:29:43 +08005748 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005749}
5750
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005751void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5752 int64_t cur_sector, int nr_sectors)
5753{
John Snowb8e6fb72015-04-17 19:49:56 -04005754 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005755 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5756}
5757
5758void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5759 int64_t cur_sector, int nr_sectors)
5760{
John Snowb8e6fb72015-04-17 19:49:56 -04005761 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005762 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5763}
5764
John Snowe74e6b72015-04-17 19:49:59 -04005765void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5766{
5767 assert(bdrv_dirty_bitmap_enabled(bitmap));
5768 hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
5769}
5770
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005771static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5772 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005773{
Fam Zhenge4654d22013-11-13 18:29:43 +08005774 BdrvDirtyBitmap *bitmap;
5775 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005776 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5777 continue;
5778 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005779 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005780 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005781}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005782
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005783static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5784 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005785{
5786 BdrvDirtyBitmap *bitmap;
5787 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005788 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5789 continue;
5790 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005791 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5792 }
5793}
5794
John Snowd58d8452015-04-17 19:49:58 -04005795/**
5796 * Advance an HBitmapIter to an arbitrary offset.
5797 */
5798void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
5799{
5800 assert(hbi->hb);
5801 hbitmap_iter_init(hbi, hbi->hb, offset);
5802}
5803
Fam Zhenge4654d22013-11-13 18:29:43 +08005804int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5805{
5806 return hbitmap_count(bitmap->bitmap);
5807}
5808
Fam Zheng9fcb0252013-08-23 09:14:46 +08005809/* Get a reference to bs */
5810void bdrv_ref(BlockDriverState *bs)
5811{
5812 bs->refcnt++;
5813}
5814
5815/* Release a previously grabbed reference to bs.
5816 * If after releasing, reference count is zero, the BlockDriverState is
5817 * deleted. */
5818void bdrv_unref(BlockDriverState *bs)
5819{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005820 if (!bs) {
5821 return;
5822 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005823 assert(bs->refcnt > 0);
5824 if (--bs->refcnt == 0) {
5825 bdrv_delete(bs);
5826 }
5827}
5828
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005829struct BdrvOpBlocker {
5830 Error *reason;
5831 QLIST_ENTRY(BdrvOpBlocker) list;
5832};
5833
5834bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5835{
5836 BdrvOpBlocker *blocker;
5837 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5838 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5839 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5840 if (errp) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03005841 error_setg(errp, "Node '%s' is busy: %s",
5842 bdrv_get_device_or_node_name(bs),
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005843 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005844 }
5845 return true;
5846 }
5847 return false;
5848}
5849
5850void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5851{
5852 BdrvOpBlocker *blocker;
5853 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5854
Markus Armbruster5839e532014-08-19 10:31:08 +02005855 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005856 blocker->reason = reason;
5857 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5858}
5859
5860void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5861{
5862 BdrvOpBlocker *blocker, *next;
5863 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5864 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5865 if (blocker->reason == reason) {
5866 QLIST_REMOVE(blocker, list);
5867 g_free(blocker);
5868 }
5869 }
5870}
5871
5872void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5873{
5874 int i;
5875 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5876 bdrv_op_block(bs, i, reason);
5877 }
5878}
5879
5880void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5881{
5882 int i;
5883 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5884 bdrv_op_unblock(bs, i, reason);
5885 }
5886}
5887
5888bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5889{
5890 int i;
5891
5892 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5893 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5894 return false;
5895 }
5896 }
5897 return true;
5898}
5899
Luiz Capitulino28a72822011-09-26 17:43:50 -03005900void bdrv_iostatus_enable(BlockDriverState *bs)
5901{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005902 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005903 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005904}
5905
5906/* The I/O status is only enabled if the drive explicitly
5907 * enables it _and_ the VM is configured to stop on errors */
5908bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5909{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005910 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005911 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5912 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5913 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005914}
5915
5916void bdrv_iostatus_disable(BlockDriverState *bs)
5917{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005918 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005919}
5920
5921void bdrv_iostatus_reset(BlockDriverState *bs)
5922{
5923 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005924 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005925 if (bs->job) {
5926 block_job_iostatus_reset(bs->job);
5927 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005928 }
5929}
5930
Luiz Capitulino28a72822011-09-26 17:43:50 -03005931void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5932{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005933 assert(bdrv_iostatus_is_enabled(bs));
5934 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005935 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5936 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005937 }
5938}
5939
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005940void bdrv_img_create(const char *filename, const char *fmt,
5941 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005942 char *options, uint64_t img_size, int flags,
5943 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005944{
Chunyan Liu83d05212014-06-05 17:20:51 +08005945 QemuOptsList *create_opts = NULL;
5946 QemuOpts *opts = NULL;
5947 const char *backing_fmt, *backing_file;
5948 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005949 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005950 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005951 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005952 int ret = 0;
5953
5954 /* Find driver and parse its options */
5955 drv = bdrv_find_format(fmt);
5956 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005957 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005958 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005959 }
5960
Max Reitzb65a5e12015-02-05 13:58:12 -05005961 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005962 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005963 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005964 }
5965
Max Reitzc6149722014-12-02 18:32:45 +01005966 if (!drv->create_opts) {
5967 error_setg(errp, "Format driver '%s' does not support image creation",
5968 drv->format_name);
5969 return;
5970 }
5971
5972 if (!proto_drv->create_opts) {
5973 error_setg(errp, "Protocol driver '%s' does not support image creation",
5974 proto_drv->format_name);
5975 return;
5976 }
5977
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005978 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5979 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005980
5981 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005982 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005983 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005984
5985 /* Parse -o options */
5986 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005987 qemu_opts_do_parse(opts, options, NULL, &local_err);
5988 if (local_err) {
5989 error_report_err(local_err);
5990 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005991 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005992 goto out;
5993 }
5994 }
5995
5996 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005997 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005998 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005999 error_setg(errp, "Backing file not supported for file format '%s'",
6000 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006001 goto out;
6002 }
6003 }
6004
6005 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01006006 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01006007 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006008 error_setg(errp, "Backing file format not supported for file "
6009 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006010 goto out;
6011 }
6012 }
6013
Chunyan Liu83d05212014-06-05 17:20:51 +08006014 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
6015 if (backing_file) {
6016 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006017 error_setg(errp, "Error: Trying to create an image with the "
6018 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01006019 goto out;
6020 }
6021 }
6022
Chunyan Liu83d05212014-06-05 17:20:51 +08006023 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
6024 if (backing_fmt) {
6025 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00006026 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006027 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08006028 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006029 goto out;
6030 }
6031 }
6032
6033 // The size for the image must always be specified, with one exception:
6034 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08006035 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
6036 if (size == -1) {
6037 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01006038 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01006039 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02006040 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02006041 int back_flags;
6042
Max Reitz29168012014-11-26 17:20:27 +01006043 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
6044 full_backing, PATH_MAX,
6045 &local_err);
6046 if (local_err) {
6047 g_free(full_backing);
6048 goto out;
6049 }
6050
Paolo Bonzini63090da2012-04-12 14:01:03 +02006051 /* backing files always opened read-only */
6052 back_flags =
6053 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006054
Max Reitzf67503e2014-02-18 18:33:05 +01006055 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01006056 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02006057 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01006058 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006059 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006060 goto out;
6061 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02006062 size = bdrv_getlength(bs);
6063 if (size < 0) {
6064 error_setg_errno(errp, -size, "Could not get size of '%s'",
6065 backing_file);
6066 bdrv_unref(bs);
6067 goto out;
6068 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006069
Markus Armbruster39101f22015-02-12 16:46:36 +01006070 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01006071
6072 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006073 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006074 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006075 goto out;
6076 }
6077 }
6078
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01006079 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08006080 printf("Formatting '%s', fmt=%s", filename, fmt);
6081 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01006082 puts("");
6083 }
Chunyan Liu83d05212014-06-05 17:20:51 +08006084
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006085 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08006086
Max Reitzcc84d902013-09-06 17:14:26 +02006087 if (ret == -EFBIG) {
6088 /* This is generally a better message than whatever the driver would
6089 * deliver (especially because of the cluster_size_hint), since that
6090 * is most probably not much different from "image too large". */
6091 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08006092 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02006093 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006094 }
Max Reitzcc84d902013-09-06 17:14:26 +02006095 error_setg(errp, "The image size is too large for file format '%s'"
6096 "%s", fmt, cluster_size_hint);
6097 error_free(local_err);
6098 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006099 }
6100
6101out:
Chunyan Liu83d05212014-06-05 17:20:51 +08006102 qemu_opts_del(opts);
6103 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01006104 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02006105 error_propagate(errp, local_err);
6106 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006107}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006108
6109AioContext *bdrv_get_aio_context(BlockDriverState *bs)
6110{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006111 return bs->aio_context;
6112}
6113
6114void bdrv_detach_aio_context(BlockDriverState *bs)
6115{
Max Reitz33384422014-06-20 21:57:33 +02006116 BdrvAioNotifier *baf;
6117
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006118 if (!bs->drv) {
6119 return;
6120 }
6121
Max Reitz33384422014-06-20 21:57:33 +02006122 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
6123 baf->detach_aio_context(baf->opaque);
6124 }
6125
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006126 if (bs->io_limits_enabled) {
6127 throttle_detach_aio_context(&bs->throttle_state);
6128 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006129 if (bs->drv->bdrv_detach_aio_context) {
6130 bs->drv->bdrv_detach_aio_context(bs);
6131 }
6132 if (bs->file) {
6133 bdrv_detach_aio_context(bs->file);
6134 }
6135 if (bs->backing_hd) {
6136 bdrv_detach_aio_context(bs->backing_hd);
6137 }
6138
6139 bs->aio_context = NULL;
6140}
6141
6142void bdrv_attach_aio_context(BlockDriverState *bs,
6143 AioContext *new_context)
6144{
Max Reitz33384422014-06-20 21:57:33 +02006145 BdrvAioNotifier *ban;
6146
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006147 if (!bs->drv) {
6148 return;
6149 }
6150
6151 bs->aio_context = new_context;
6152
6153 if (bs->backing_hd) {
6154 bdrv_attach_aio_context(bs->backing_hd, new_context);
6155 }
6156 if (bs->file) {
6157 bdrv_attach_aio_context(bs->file, new_context);
6158 }
6159 if (bs->drv->bdrv_attach_aio_context) {
6160 bs->drv->bdrv_attach_aio_context(bs, new_context);
6161 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006162 if (bs->io_limits_enabled) {
6163 throttle_attach_aio_context(&bs->throttle_state, new_context);
6164 }
Max Reitz33384422014-06-20 21:57:33 +02006165
6166 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
6167 ban->attached_aio_context(new_context, ban->opaque);
6168 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006169}
6170
6171void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6172{
6173 bdrv_drain_all(); /* ensure there are no in-flight requests */
6174
6175 bdrv_detach_aio_context(bs);
6176
6177 /* This function executes in the old AioContext so acquire the new one in
6178 * case it runs in a different thread.
6179 */
6180 aio_context_acquire(new_context);
6181 bdrv_attach_aio_context(bs, new_context);
6182 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006183}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006184
Max Reitz33384422014-06-20 21:57:33 +02006185void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6186 void (*attached_aio_context)(AioContext *new_context, void *opaque),
6187 void (*detach_aio_context)(void *opaque), void *opaque)
6188{
6189 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6190 *ban = (BdrvAioNotifier){
6191 .attached_aio_context = attached_aio_context,
6192 .detach_aio_context = detach_aio_context,
6193 .opaque = opaque
6194 };
6195
6196 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6197}
6198
6199void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6200 void (*attached_aio_context)(AioContext *,
6201 void *),
6202 void (*detach_aio_context)(void *),
6203 void *opaque)
6204{
6205 BdrvAioNotifier *ban, *ban_next;
6206
6207 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6208 if (ban->attached_aio_context == attached_aio_context &&
6209 ban->detach_aio_context == detach_aio_context &&
6210 ban->opaque == opaque)
6211 {
6212 QLIST_REMOVE(ban, list);
6213 g_free(ban);
6214
6215 return;
6216 }
6217 }
6218
6219 abort();
6220}
6221
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006222void bdrv_add_before_write_notifier(BlockDriverState *bs,
6223 NotifierWithReturn *notifier)
6224{
6225 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6226}
Max Reitz6f176b42013-09-03 10:09:50 +02006227
Max Reitz77485432014-10-27 11:12:50 +01006228int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6229 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02006230{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006231 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02006232 return -ENOTSUP;
6233 }
Max Reitz77485432014-10-27 11:12:50 +01006234 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02006235}
Benoît Canetf6186f42013-10-02 14:33:48 +02006236
Benoît Canetb5042a32014-03-03 19:11:34 +01006237/* This function will be called by the bdrv_recurse_is_first_non_filter method
6238 * of block filter and by bdrv_is_first_non_filter.
6239 * It is used to test if the given bs is the candidate or recurse more in the
6240 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01006241 */
Benoît Canet212a5a82014-01-23 21:31:36 +01006242bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6243 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02006244{
Benoît Canetb5042a32014-03-03 19:11:34 +01006245 /* return false if basic checks fails */
6246 if (!bs || !bs->drv) {
6247 return false;
6248 }
6249
6250 /* the code reached a non block filter driver -> check if the bs is
6251 * the same as the candidate. It's the recursion termination condition.
6252 */
6253 if (!bs->drv->is_filter) {
6254 return bs == candidate;
6255 }
6256 /* Down this path the driver is a block filter driver */
6257
6258 /* If the block filter recursion method is defined use it to recurse down
6259 * the node graph.
6260 */
6261 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006262 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6263 }
6264
Benoît Canetb5042a32014-03-03 19:11:34 +01006265 /* the driver is a block filter but don't allow to recurse -> return false
6266 */
6267 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006268}
6269
6270/* This function checks if the candidate is the first non filter bs down it's
6271 * bs chain. Since we don't have pointers to parents it explore all bs chains
6272 * from the top. Some filters can choose not to pass down the recursion.
6273 */
6274bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6275{
6276 BlockDriverState *bs;
6277
6278 /* walk down the bs forest recursively */
6279 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6280 bool perm;
6281
Benoît Canetb5042a32014-03-03 19:11:34 +01006282 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006283 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006284
6285 /* candidate is the first non filter */
6286 if (perm) {
6287 return true;
6288 }
6289 }
6290
6291 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006292}
Benoît Canet09158f02014-06-27 18:25:25 +02006293
6294BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6295{
6296 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006297 AioContext *aio_context;
6298
Benoît Canet09158f02014-06-27 18:25:25 +02006299 if (!to_replace_bs) {
6300 error_setg(errp, "Node name '%s' not found", node_name);
6301 return NULL;
6302 }
6303
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006304 aio_context = bdrv_get_aio_context(to_replace_bs);
6305 aio_context_acquire(aio_context);
6306
Benoît Canet09158f02014-06-27 18:25:25 +02006307 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006308 to_replace_bs = NULL;
6309 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006310 }
6311
6312 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6313 * most non filter in order to prevent data corruption.
6314 * Another benefit is that this tests exclude backing files which are
6315 * blocked by the backing blockers.
6316 */
6317 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6318 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006319 to_replace_bs = NULL;
6320 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006321 }
6322
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006323out:
6324 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006325 return to_replace_bs;
6326}
Ming Lei448ad912014-07-04 18:04:33 +08006327
6328void bdrv_io_plug(BlockDriverState *bs)
6329{
6330 BlockDriver *drv = bs->drv;
6331 if (drv && drv->bdrv_io_plug) {
6332 drv->bdrv_io_plug(bs);
6333 } else if (bs->file) {
6334 bdrv_io_plug(bs->file);
6335 }
6336}
6337
6338void bdrv_io_unplug(BlockDriverState *bs)
6339{
6340 BlockDriver *drv = bs->drv;
6341 if (drv && drv->bdrv_io_unplug) {
6342 drv->bdrv_io_unplug(bs);
6343 } else if (bs->file) {
6344 bdrv_io_unplug(bs->file);
6345 }
6346}
6347
6348void bdrv_flush_io_queue(BlockDriverState *bs)
6349{
6350 BlockDriver *drv = bs->drv;
6351 if (drv && drv->bdrv_flush_io_queue) {
6352 drv->bdrv_flush_io_queue(bs);
6353 } else if (bs->file) {
6354 bdrv_flush_io_queue(bs->file);
6355 }
6356}
Max Reitz91af7012014-07-18 20:24:56 +02006357
6358static bool append_open_options(QDict *d, BlockDriverState *bs)
6359{
6360 const QDictEntry *entry;
6361 bool found_any = false;
6362
6363 for (entry = qdict_first(bs->options); entry;
6364 entry = qdict_next(bs->options, entry))
6365 {
6366 /* Only take options for this level and exclude all non-driver-specific
6367 * options */
6368 if (!strchr(qdict_entry_key(entry), '.') &&
6369 strcmp(qdict_entry_key(entry), "node-name"))
6370 {
6371 qobject_incref(qdict_entry_value(entry));
6372 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6373 found_any = true;
6374 }
6375 }
6376
6377 return found_any;
6378}
6379
6380/* Updates the following BDS fields:
6381 * - exact_filename: A filename which may be used for opening a block device
6382 * which (mostly) equals the given BDS (even without any
6383 * other options; so reading and writing must return the same
6384 * results, but caching etc. may be different)
6385 * - full_open_options: Options which, when given when opening a block device
6386 * (without a filename), result in a BDS (mostly)
6387 * equalling the given one
6388 * - filename: If exact_filename is set, it is copied here. Otherwise,
6389 * full_open_options is converted to a JSON object, prefixed with
6390 * "json:" (for use through the JSON pseudo protocol) and put here.
6391 */
6392void bdrv_refresh_filename(BlockDriverState *bs)
6393{
6394 BlockDriver *drv = bs->drv;
6395 QDict *opts;
6396
6397 if (!drv) {
6398 return;
6399 }
6400
6401 /* This BDS's file name will most probably depend on its file's name, so
6402 * refresh that first */
6403 if (bs->file) {
6404 bdrv_refresh_filename(bs->file);
6405 }
6406
6407 if (drv->bdrv_refresh_filename) {
6408 /* Obsolete information is of no use here, so drop the old file name
6409 * information before refreshing it */
6410 bs->exact_filename[0] = '\0';
6411 if (bs->full_open_options) {
6412 QDECREF(bs->full_open_options);
6413 bs->full_open_options = NULL;
6414 }
6415
6416 drv->bdrv_refresh_filename(bs);
6417 } else if (bs->file) {
6418 /* Try to reconstruct valid information from the underlying file */
6419 bool has_open_options;
6420
6421 bs->exact_filename[0] = '\0';
6422 if (bs->full_open_options) {
6423 QDECREF(bs->full_open_options);
6424 bs->full_open_options = NULL;
6425 }
6426
6427 opts = qdict_new();
6428 has_open_options = append_open_options(opts, bs);
6429
6430 /* If no specific options have been given for this BDS, the filename of
6431 * the underlying file should suffice for this one as well */
6432 if (bs->file->exact_filename[0] && !has_open_options) {
6433 strcpy(bs->exact_filename, bs->file->exact_filename);
6434 }
6435 /* Reconstructing the full options QDict is simple for most format block
6436 * drivers, as long as the full options are known for the underlying
6437 * file BDS. The full options QDict of that file BDS should somehow
6438 * contain a representation of the filename, therefore the following
6439 * suffices without querying the (exact_)filename of this BDS. */
6440 if (bs->file->full_open_options) {
6441 qdict_put_obj(opts, "driver",
6442 QOBJECT(qstring_from_str(drv->format_name)));
6443 QINCREF(bs->file->full_open_options);
6444 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6445
6446 bs->full_open_options = opts;
6447 } else {
6448 QDECREF(opts);
6449 }
6450 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6451 /* There is no underlying file BDS (at least referenced by BDS.file),
6452 * so the full options QDict should be equal to the options given
6453 * specifically for this block device when it was opened (plus the
6454 * driver specification).
6455 * Because those options don't change, there is no need to update
6456 * full_open_options when it's already set. */
6457
6458 opts = qdict_new();
6459 append_open_options(opts, bs);
6460 qdict_put_obj(opts, "driver",
6461 QOBJECT(qstring_from_str(drv->format_name)));
6462
6463 if (bs->exact_filename[0]) {
6464 /* This may not work for all block protocol drivers (some may
6465 * require this filename to be parsed), but we have to find some
6466 * default solution here, so just include it. If some block driver
6467 * does not support pure options without any filename at all or
6468 * needs some special format of the options QDict, it needs to
6469 * implement the driver-specific bdrv_refresh_filename() function.
6470 */
6471 qdict_put_obj(opts, "filename",
6472 QOBJECT(qstring_from_str(bs->exact_filename)));
6473 }
6474
6475 bs->full_open_options = opts;
6476 }
6477
6478 if (bs->exact_filename[0]) {
6479 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6480 } else if (bs->full_open_options) {
6481 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6482 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6483 qstring_get_str(json));
6484 QDECREF(json);
6485 }
6486}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006487
6488/* This accessor function purpose is to allow the device models to access the
6489 * BlockAcctStats structure embedded inside a BlockDriverState without being
6490 * aware of the BlockDriverState structure layout.
6491 * It will go away when the BlockAcctStats structure will be moved inside
6492 * the device models.
6493 */
6494BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6495{
6496 return &bs->stats;
6497}