blob: 9dc5c8cb1a752e12c16f7d8bdcbc367f731fdc19 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
John Snow9bd2b082015-04-17 19:49:57 -040055/**
56 * A BdrvDirtyBitmap can be in three possible states:
57 * (1) successor is NULL and disabled is false: full r/w mode
58 * (2) successor is NULL and disabled is true: read only mode ("disabled")
59 * (3) successor is set: frozen mode.
60 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
61 * or enabled. A frozen bitmap can only abdicate() or reclaim().
62 */
Fam Zhenge4654d22013-11-13 18:29:43 +080063struct BdrvDirtyBitmap {
64 HBitmap *bitmap;
John Snow9bd2b082015-04-17 19:49:57 -040065 BdrvDirtyBitmap *successor;
Fam Zheng0db6e542015-04-17 19:49:50 -040066 char *name;
John Snowb8e6fb72015-04-17 19:49:56 -040067 bool disabled;
Fam Zhenge4654d22013-11-13 18:29:43 +080068 QLIST_ENTRY(BdrvDirtyBitmap) list;
69};
70
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010071#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
72
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020073static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000074 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020075 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020076static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000077 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020078 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020079static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
80 int64_t sector_num, int nb_sectors,
81 QEMUIOVector *iov);
82static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
83 int64_t sector_num, int nb_sectors,
84 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010085static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
86 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000087 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010088static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
89 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000090 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020091static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
92 int64_t sector_num,
93 QEMUIOVector *qiov,
94 int nb_sectors,
95 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020096 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020097 void *opaque,
98 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010099static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +0100100static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +0200101 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +0000102
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100103static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
104 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +0000105
Benoît Canetdc364f42014-01-23 21:31:32 +0100106static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
107 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
108
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100109static QLIST_HEAD(, BlockDriver) bdrv_drivers =
110 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000111
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300112static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
113 int nr_sectors);
114static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
115 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100116/* If non-zero, use only whitelisted block drivers */
117static int use_bdrv_whitelist;
118
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000119#ifdef _WIN32
120static int is_windows_drive_prefix(const char *filename)
121{
122 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
123 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
124 filename[1] == ':');
125}
126
127int is_windows_drive(const char *filename)
128{
129 if (is_windows_drive_prefix(filename) &&
130 filename[2] == '\0')
131 return 1;
132 if (strstart(filename, "\\\\.\\", NULL) ||
133 strstart(filename, "//./", NULL))
134 return 1;
135 return 0;
136}
137#endif
138
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800139/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200140void bdrv_set_io_limits(BlockDriverState *bs,
141 ThrottleConfig *cfg)
142{
143 int i;
144
145 throttle_config(&bs->throttle_state, cfg);
146
147 for (i = 0; i < 2; i++) {
148 qemu_co_enter_next(&bs->throttled_reqs[i]);
149 }
150}
151
152/* this function drain all the throttled IOs */
153static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
154{
155 bool drained = false;
156 bool enabled = bs->io_limits_enabled;
157 int i;
158
159 bs->io_limits_enabled = false;
160
161 for (i = 0; i < 2; i++) {
162 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
163 drained = true;
164 }
165 }
166
167 bs->io_limits_enabled = enabled;
168
169 return drained;
170}
171
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800172void bdrv_io_limits_disable(BlockDriverState *bs)
173{
174 bs->io_limits_enabled = false;
175
Benoît Canetcc0681c2013-09-02 14:14:39 +0200176 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800177
Benoît Canetcc0681c2013-09-02 14:14:39 +0200178 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800179}
180
Benoît Canetcc0681c2013-09-02 14:14:39 +0200181static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800182{
183 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200184 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800185}
186
Benoît Canetcc0681c2013-09-02 14:14:39 +0200187static void bdrv_throttle_write_timer_cb(void *opaque)
188{
189 BlockDriverState *bs = opaque;
190 qemu_co_enter_next(&bs->throttled_reqs[1]);
191}
192
193/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800194void bdrv_io_limits_enable(BlockDriverState *bs)
195{
Fam Zhengde50a202015-03-25 15:27:26 +0800196 int clock_type = QEMU_CLOCK_REALTIME;
197
198 if (qtest_enabled()) {
199 /* For testing block IO throttling only */
200 clock_type = QEMU_CLOCK_VIRTUAL;
201 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 assert(!bs->io_limits_enabled);
203 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200204 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800205 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200206 bdrv_throttle_read_timer_cb,
207 bdrv_throttle_write_timer_cb,
208 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800209 bs->io_limits_enabled = true;
210}
211
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212/* This function makes an IO wait if needed
213 *
214 * @nb_sectors: the number of sectors of the IO
215 * @is_write: is the IO a write
216 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800217static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100218 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200219 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800220{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200221 /* does this io must wait */
222 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800223
Benoît Canetcc0681c2013-09-02 14:14:39 +0200224 /* if must wait or any request of this type throttled queue the IO */
225 if (must_wait ||
226 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
227 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800228 }
229
Benoît Canetcc0681c2013-09-02 14:14:39 +0200230 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100231 throttle_account(&bs->throttle_state, is_write, bytes);
232
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800233
Benoît Canetcc0681c2013-09-02 14:14:39 +0200234 /* if the next request must wait -> do nothing */
235 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
236 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800237 }
238
Benoît Canetcc0681c2013-09-02 14:14:39 +0200239 /* else queue next request for execution */
240 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800241}
242
Kevin Wolf339064d2013-11-28 10:23:32 +0100243size_t bdrv_opt_mem_align(BlockDriverState *bs)
244{
245 if (!bs || !bs->drv) {
246 /* 4k should be on the safe side */
247 return 4096;
248 }
249
250 return bs->bl.opt_mem_alignment;
251}
252
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000253/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100254int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000255{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200256 const char *p;
257
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000258#ifdef _WIN32
259 if (is_windows_drive(path) ||
260 is_windows_drive_prefix(path)) {
261 return 0;
262 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200263 p = path + strcspn(path, ":/\\");
264#else
265 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000266#endif
267
Paolo Bonzini947995c2012-05-08 16:51:48 +0200268 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000269}
270
bellard83f64092006-08-01 16:21:11 +0000271int path_is_absolute(const char *path)
272{
bellard21664422007-01-07 18:22:37 +0000273#ifdef _WIN32
274 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200275 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000276 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200277 }
278 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000279#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200280 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000281#endif
bellard83f64092006-08-01 16:21:11 +0000282}
283
284/* if filename is absolute, just copy it to dest. Otherwise, build a
285 path to it by considering it is relative to base_path. URL are
286 supported. */
287void path_combine(char *dest, int dest_size,
288 const char *base_path,
289 const char *filename)
290{
291 const char *p, *p1;
292 int len;
293
294 if (dest_size <= 0)
295 return;
296 if (path_is_absolute(filename)) {
297 pstrcpy(dest, dest_size, filename);
298 } else {
299 p = strchr(base_path, ':');
300 if (p)
301 p++;
302 else
303 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000304 p1 = strrchr(base_path, '/');
305#ifdef _WIN32
306 {
307 const char *p2;
308 p2 = strrchr(base_path, '\\');
309 if (!p1 || p2 > p1)
310 p1 = p2;
311 }
312#endif
bellard83f64092006-08-01 16:21:11 +0000313 if (p1)
314 p1++;
315 else
316 p1 = base_path;
317 if (p1 > p)
318 p = p1;
319 len = p - base_path;
320 if (len > dest_size - 1)
321 len = dest_size - 1;
322 memcpy(dest, base_path, len);
323 dest[len] = '\0';
324 pstrcat(dest, dest_size, filename);
325 }
326}
327
Max Reitz0a828552014-11-26 17:20:25 +0100328void bdrv_get_full_backing_filename_from_filename(const char *backed,
329 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100330 char *dest, size_t sz,
331 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100332{
Max Reitz9f074292014-11-26 17:20:26 +0100333 if (backing[0] == '\0' || path_has_protocol(backing) ||
334 path_is_absolute(backing))
335 {
Max Reitz0a828552014-11-26 17:20:25 +0100336 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100337 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
338 error_setg(errp, "Cannot use relative backing file names for '%s'",
339 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100340 } else {
341 path_combine(dest, sz, backed, backing);
342 }
343}
344
Max Reitz9f074292014-11-26 17:20:26 +0100345void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
346 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200347{
Max Reitz9f074292014-11-26 17:20:26 +0100348 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
349
350 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
351 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200352}
353
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500354void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000355{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100356 /* Block drivers without coroutine functions need emulation */
357 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200358 bdrv->bdrv_co_readv = bdrv_co_readv_em;
359 bdrv->bdrv_co_writev = bdrv_co_writev_em;
360
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100361 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
362 * the block driver lacks aio we need to emulate that too.
363 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200364 if (!bdrv->bdrv_aio_readv) {
365 /* add AIO emulation layer */
366 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
367 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200368 }
bellard83f64092006-08-01 16:21:11 +0000369 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200370
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100371 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000372}
bellardb3380822004-03-14 21:38:54 +0000373
Markus Armbruster7f06d472014-10-07 13:59:12 +0200374BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000375{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200376 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200377
Markus Armbrustere4e99862014-10-07 13:59:03 +0200378 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200379 return bs;
380}
381
382BlockDriverState *bdrv_new(void)
383{
384 BlockDriverState *bs;
385 int i;
386
Markus Armbruster5839e532014-08-19 10:31:08 +0200387 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800388 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800389 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
390 QLIST_INIT(&bs->op_blockers[i]);
391 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300392 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200393 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200394 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200395 qemu_co_queue_init(&bs->throttled_reqs[0]);
396 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800397 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200398 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200399
bellardb3380822004-03-14 21:38:54 +0000400 return bs;
401}
402
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200403void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
404{
405 notifier_list_add(&bs->close_notifiers, notify);
406}
407
bellardea2384d2004-08-01 21:59:26 +0000408BlockDriver *bdrv_find_format(const char *format_name)
409{
410 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100411 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
412 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000413 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100414 }
bellardea2384d2004-08-01 21:59:26 +0000415 }
416 return NULL;
417}
418
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800419static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100420{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800421 static const char *whitelist_rw[] = {
422 CONFIG_BDRV_RW_WHITELIST
423 };
424 static const char *whitelist_ro[] = {
425 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100426 };
427 const char **p;
428
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800429 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100430 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800431 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100432
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800433 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100434 if (!strcmp(drv->format_name, *p)) {
435 return 1;
436 }
437 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800438 if (read_only) {
439 for (p = whitelist_ro; *p; p++) {
440 if (!strcmp(drv->format_name, *p)) {
441 return 1;
442 }
443 }
444 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100445 return 0;
446}
447
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800448BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
449 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100450{
451 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800452 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100453}
454
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800455typedef struct CreateCo {
456 BlockDriver *drv;
457 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800458 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800459 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200460 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800461} CreateCo;
462
463static void coroutine_fn bdrv_create_co_entry(void *opaque)
464{
Max Reitzcc84d902013-09-06 17:14:26 +0200465 Error *local_err = NULL;
466 int ret;
467
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800468 CreateCo *cco = opaque;
469 assert(cco->drv);
470
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800471 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100472 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200473 error_propagate(&cco->err, local_err);
474 }
475 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800476}
477
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200478int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800479 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000480{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800481 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200482
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800483 Coroutine *co;
484 CreateCo cco = {
485 .drv = drv,
486 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800487 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800488 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200489 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800490 };
491
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800492 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200493 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300494 ret = -ENOTSUP;
495 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800496 }
497
498 if (qemu_in_coroutine()) {
499 /* Fast-path if already in coroutine context */
500 bdrv_create_co_entry(&cco);
501 } else {
502 co = qemu_coroutine_create(bdrv_create_co_entry);
503 qemu_coroutine_enter(co, &cco);
504 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200505 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800506 }
507 }
508
509 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200510 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100511 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200512 error_propagate(errp, cco.err);
513 } else {
514 error_setg_errno(errp, -ret, "Could not create image");
515 }
516 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800517
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300518out:
519 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800520 return ret;
bellardea2384d2004-08-01 21:59:26 +0000521}
522
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800523int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200524{
525 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200526 Error *local_err = NULL;
527 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200528
Max Reitzb65a5e12015-02-05 13:58:12 -0500529 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200530 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000531 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200532 }
533
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800534 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100535 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200536 error_propagate(errp, local_err);
537 }
538 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200539}
540
Kevin Wolf3baca892014-07-16 17:48:16 +0200541void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100542{
543 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200544 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100545
546 memset(&bs->bl, 0, sizeof(bs->bl));
547
Kevin Wolf466ad822013-12-11 19:50:32 +0100548 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200549 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100550 }
551
552 /* Take some limits from the children as a default */
553 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200554 bdrv_refresh_limits(bs->file, &local_err);
555 if (local_err) {
556 error_propagate(errp, local_err);
557 return;
558 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100559 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100560 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100561 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
562 } else {
563 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100564 }
565
566 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200567 bdrv_refresh_limits(bs->backing_hd, &local_err);
568 if (local_err) {
569 error_propagate(errp, local_err);
570 return;
571 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100572 bs->bl.opt_transfer_length =
573 MAX(bs->bl.opt_transfer_length,
574 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100575 bs->bl.max_transfer_length =
576 MIN_NON_ZERO(bs->bl.max_transfer_length,
577 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100578 bs->bl.opt_mem_alignment =
579 MAX(bs->bl.opt_mem_alignment,
580 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100581 }
582
583 /* Then let the driver override it */
584 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200585 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100586 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100587}
588
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100589/**
590 * Try to get @bs's logical and physical block size.
591 * On success, store them in @bsz struct and return 0.
592 * On failure return -errno.
593 * @bs must not be empty.
594 */
595int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
596{
597 BlockDriver *drv = bs->drv;
598
599 if (drv && drv->bdrv_probe_blocksizes) {
600 return drv->bdrv_probe_blocksizes(bs, bsz);
601 }
602
603 return -ENOTSUP;
604}
605
606/**
607 * Try to get @bs's geometry (cyls, heads, sectors).
608 * On success, store them in @geo struct and return 0.
609 * On failure return -errno.
610 * @bs must not be empty.
611 */
612int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
613{
614 BlockDriver *drv = bs->drv;
615
616 if (drv && drv->bdrv_probe_geometry) {
617 return drv->bdrv_probe_geometry(bs, geo);
618 }
619
620 return -ENOTSUP;
621}
622
Jim Meyeringeba25052012-05-28 09:27:54 +0200623/*
624 * Create a uniquely-named empty temporary file.
625 * Return 0 upon success, otherwise a negative errno value.
626 */
627int get_tmp_filename(char *filename, int size)
628{
bellardd5249392004-08-03 21:14:23 +0000629#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000630 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200631 /* GetTempFileName requires that its output buffer (4th param)
632 have length MAX_PATH or greater. */
633 assert(size >= MAX_PATH);
634 return (GetTempPath(MAX_PATH, temp_dir)
635 && GetTempFileName(temp_dir, "qem", 0, filename)
636 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000637#else
bellardea2384d2004-08-01 21:59:26 +0000638 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000639 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000640 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530641 if (!tmpdir) {
642 tmpdir = "/var/tmp";
643 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200644 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
645 return -EOVERFLOW;
646 }
bellardea2384d2004-08-01 21:59:26 +0000647 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800648 if (fd < 0) {
649 return -errno;
650 }
651 if (close(fd) != 0) {
652 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200653 return -errno;
654 }
655 return 0;
bellardd5249392004-08-03 21:14:23 +0000656#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200657}
bellardea2384d2004-08-01 21:59:26 +0000658
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200659/*
660 * Detect host devices. By convention, /dev/cdrom[N] is always
661 * recognized as a host CDROM.
662 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200663static BlockDriver *find_hdev_driver(const char *filename)
664{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200665 int score_max = 0, score;
666 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200667
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100668 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200669 if (d->bdrv_probe_device) {
670 score = d->bdrv_probe_device(filename);
671 if (score > score_max) {
672 score_max = score;
673 drv = d;
674 }
675 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200676 }
677
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200678 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200679}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200680
Kevin Wolf98289622013-07-10 15:47:39 +0200681BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500682 bool allow_protocol_prefix,
683 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200684{
685 BlockDriver *drv1;
686 char protocol[128];
687 int len;
688 const char *p;
689
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200690 /* TODO Drivers without bdrv_file_open must be specified explicitly */
691
Christoph Hellwig39508e72010-06-23 12:25:17 +0200692 /*
693 * XXX(hch): we really should not let host device detection
694 * override an explicit protocol specification, but moving this
695 * later breaks access to device names with colons in them.
696 * Thanks to the brain-dead persistent naming schemes on udev-
697 * based Linux systems those actually are quite common.
698 */
699 drv1 = find_hdev_driver(filename);
700 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200701 return drv1;
702 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200703
Kevin Wolf98289622013-07-10 15:47:39 +0200704 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100705 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200706 }
Kevin Wolf98289622013-07-10 15:47:39 +0200707
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000708 p = strchr(filename, ':');
709 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200710 len = p - filename;
711 if (len > sizeof(protocol) - 1)
712 len = sizeof(protocol) - 1;
713 memcpy(protocol, filename, len);
714 protocol[len] = '\0';
715 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
716 if (drv1->protocol_name &&
717 !strcmp(drv1->protocol_name, protocol)) {
718 return drv1;
719 }
720 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500721
722 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200723 return NULL;
724}
725
Markus Armbrusterc6684242014-11-20 16:27:10 +0100726/*
727 * Guess image format by probing its contents.
728 * This is not a good idea when your image is raw (CVE-2008-2004), but
729 * we do it anyway for backward compatibility.
730 *
731 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100732 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
733 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100734 * @filename is its filename.
735 *
736 * For all block drivers, call the bdrv_probe() method to get its
737 * probing score.
738 * Return the first block driver with the highest probing score.
739 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100740BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
741 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100742{
743 int score_max = 0, score;
744 BlockDriver *drv = NULL, *d;
745
746 QLIST_FOREACH(d, &bdrv_drivers, list) {
747 if (d->bdrv_probe) {
748 score = d->bdrv_probe(buf, buf_size, filename);
749 if (score > score_max) {
750 score_max = score;
751 drv = d;
752 }
753 }
754 }
755
756 return drv;
757}
758
Kevin Wolff500a6d2012-11-12 17:35:27 +0100759static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200760 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000761{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100762 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100763 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100764 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700765
Kevin Wolf08a00552010-06-01 18:37:31 +0200766 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100767 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100768 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200769 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700770 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700771
bellard83f64092006-08-01 16:21:11 +0000772 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000773 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200774 error_setg_errno(errp, -ret, "Could not read image for determining its "
775 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200776 *pdrv = NULL;
777 return ret;
bellard83f64092006-08-01 16:21:11 +0000778 }
779
Markus Armbrusterc6684242014-11-20 16:27:10 +0100780 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200781 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200782 error_setg(errp, "Could not determine image format: No compatible "
783 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200784 ret = -ENOENT;
785 }
786 *pdrv = drv;
787 return ret;
bellardea2384d2004-08-01 21:59:26 +0000788}
789
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100790/**
791 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200792 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100793 */
794static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
795{
796 BlockDriver *drv = bs->drv;
797
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700798 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
799 if (bs->sg)
800 return 0;
801
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100802 /* query actual device if possible, otherwise just trust the hint */
803 if (drv->bdrv_getlength) {
804 int64_t length = drv->bdrv_getlength(bs);
805 if (length < 0) {
806 return length;
807 }
Fam Zheng7e382002013-11-06 19:48:06 +0800808 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100809 }
810
811 bs->total_sectors = hint;
812 return 0;
813}
814
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100815/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100816 * Set open flags for a given discard mode
817 *
818 * Return 0 on success, -1 if the discard mode was invalid.
819 */
820int bdrv_parse_discard_flags(const char *mode, int *flags)
821{
822 *flags &= ~BDRV_O_UNMAP;
823
824 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
825 /* do nothing */
826 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
827 *flags |= BDRV_O_UNMAP;
828 } else {
829 return -1;
830 }
831
832 return 0;
833}
834
835/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100836 * Set open flags for a given cache mode
837 *
838 * Return 0 on success, -1 if the cache mode was invalid.
839 */
840int bdrv_parse_cache_flags(const char *mode, int *flags)
841{
842 *flags &= ~BDRV_O_CACHE_MASK;
843
844 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
845 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100846 } else if (!strcmp(mode, "directsync")) {
847 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100848 } else if (!strcmp(mode, "writeback")) {
849 *flags |= BDRV_O_CACHE_WB;
850 } else if (!strcmp(mode, "unsafe")) {
851 *flags |= BDRV_O_CACHE_WB;
852 *flags |= BDRV_O_NO_FLUSH;
853 } else if (!strcmp(mode, "writethrough")) {
854 /* this is the default */
855 } else {
856 return -1;
857 }
858
859 return 0;
860}
861
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000862/**
863 * The copy-on-read flag is actually a reference count so multiple users may
864 * use the feature without worrying about clobbering its previous state.
865 * Copy-on-read stays enabled until all users have called to disable it.
866 */
867void bdrv_enable_copy_on_read(BlockDriverState *bs)
868{
869 bs->copy_on_read++;
870}
871
872void bdrv_disable_copy_on_read(BlockDriverState *bs)
873{
874 assert(bs->copy_on_read > 0);
875 bs->copy_on_read--;
876}
877
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200878/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200879 * Returns the flags that a temporary snapshot should get, based on the
880 * originally requested flags (the originally requested image will have flags
881 * like a backing file)
882 */
883static int bdrv_temp_snapshot_flags(int flags)
884{
885 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
886}
887
888/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200889 * Returns the flags that bs->file should get, based on the given flags for
890 * the parent BDS
891 */
892static int bdrv_inherited_flags(int flags)
893{
894 /* Enable protocol handling, disable format probing for bs->file */
895 flags |= BDRV_O_PROTOCOL;
896
897 /* Our block drivers take care to send flushes and respect unmap policy,
898 * so we can enable both unconditionally on lower layers. */
899 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
900
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200901 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200902 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200903
904 return flags;
905}
906
Kevin Wolf317fc442014-04-25 13:27:34 +0200907/*
908 * Returns the flags that bs->backing_hd should get, based on the given flags
909 * for the parent BDS
910 */
911static int bdrv_backing_flags(int flags)
912{
913 /* backing files always opened read-only */
914 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
915
916 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200917 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200918
919 return flags;
920}
921
Kevin Wolf7b272452012-11-12 17:05:39 +0100922static int bdrv_open_flags(BlockDriverState *bs, int flags)
923{
924 int open_flags = flags | BDRV_O_CACHE_WB;
925
926 /*
927 * Clear flags that are internal to the block layer before opening the
928 * image.
929 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200930 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100931
932 /*
933 * Snapshots should be writable.
934 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200935 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100936 open_flags |= BDRV_O_RDWR;
937 }
938
939 return open_flags;
940}
941
Kevin Wolf636ea372014-01-24 14:11:52 +0100942static void bdrv_assign_node_name(BlockDriverState *bs,
943 const char *node_name,
944 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100945{
946 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100947 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100948 }
949
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200950 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200951 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200952 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100953 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100954 }
955
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100956 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200957 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100958 error_setg(errp, "node-name=%s is conflicting with a device id",
959 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100960 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100961 }
962
Benoît Canet6913c0c2014-01-23 21:31:33 +0100963 /* takes care of avoiding duplicates node names */
964 if (bdrv_find_node(node_name)) {
965 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100966 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100967 }
968
969 /* copy node name into the bs and insert it into the graph list */
970 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
971 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100972}
973
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200974/*
Kevin Wolf57915332010-04-14 15:24:50 +0200975 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100976 *
977 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200978 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100979static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200980 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200981{
982 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200983 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100984 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200985 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200986
987 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200988 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100989 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200990
Kevin Wolf45673672013-04-22 17:48:40 +0200991 if (file != NULL) {
992 filename = file->filename;
993 } else {
994 filename = qdict_get_try_str(options, "filename");
995 }
996
Kevin Wolf765003d2014-02-03 14:49:42 +0100997 if (drv->bdrv_needs_filename && !filename) {
998 error_setg(errp, "The '%s' block driver requires a file name",
999 drv->format_name);
1000 return -EINVAL;
1001 }
1002
Kevin Wolf45673672013-04-22 17:48:40 +02001003 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +01001004
Benoît Canet6913c0c2014-01-23 21:31:33 +01001005 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +01001006 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +02001007 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +01001008 error_propagate(errp, local_err);
1009 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001010 }
1011 qdict_del(options, "node-name");
1012
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001013 /* bdrv_open() with directly using a protocol as drv. This layer is already
1014 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1015 * and return immediately. */
1016 if (file != NULL && drv->bdrv_file_open) {
1017 bdrv_swap(file, bs);
1018 return 0;
1019 }
1020
Kevin Wolf57915332010-04-14 15:24:50 +02001021 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001022 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001023 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001024 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001025 open_flags = bdrv_open_flags(bs, flags);
1026 bs->read_only = !(open_flags & BDRV_O_RDWR);
1027
1028 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001029 error_setg(errp,
1030 !bs->read_only && bdrv_is_whitelisted(drv, true)
1031 ? "Driver '%s' can only be used for read-only devices"
1032 : "Driver '%s' is not whitelisted",
1033 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001034 return -ENOTSUP;
1035 }
Kevin Wolf57915332010-04-14 15:24:50 +02001036
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001037 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001038 if (flags & BDRV_O_COPY_ON_READ) {
1039 if (!bs->read_only) {
1040 bdrv_enable_copy_on_read(bs);
1041 } else {
1042 error_setg(errp, "Can't use copy-on-read on read-only device");
1043 return -EINVAL;
1044 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001045 }
1046
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001047 if (filename != NULL) {
1048 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1049 } else {
1050 bs->filename[0] = '\0';
1051 }
Max Reitz91af7012014-07-18 20:24:56 +02001052 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001053
Kevin Wolf57915332010-04-14 15:24:50 +02001054 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001055 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001056
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001057 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001058
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001059 /* Open the image, either directly or using a protocol */
1060 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001061 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001062 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001063 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001064 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001065 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001066 error_setg(errp, "Can't use '%s' as a block driver for the "
1067 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001068 ret = -EINVAL;
1069 goto free_and_fail;
1070 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001071 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001072 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001073 }
1074
Kevin Wolf57915332010-04-14 15:24:50 +02001075 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001076 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001077 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001078 } else if (bs->filename[0]) {
1079 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001080 } else {
1081 error_setg_errno(errp, -ret, "Could not open image");
1082 }
Kevin Wolf57915332010-04-14 15:24:50 +02001083 goto free_and_fail;
1084 }
1085
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001086 if (bs->encrypted) {
1087 error_report("Encrypted images are deprecated");
1088 error_printf("Support for them will be removed in a future release.\n"
1089 "You can use 'qemu-img convert' to convert your image"
1090 " to an unencrypted one.\n");
1091 }
1092
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001093 ret = refresh_total_sectors(bs, bs->total_sectors);
1094 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001095 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001096 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001097 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001098
Kevin Wolf3baca892014-07-16 17:48:16 +02001099 bdrv_refresh_limits(bs, &local_err);
1100 if (local_err) {
1101 error_propagate(errp, local_err);
1102 ret = -EINVAL;
1103 goto free_and_fail;
1104 }
1105
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001106 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001107 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001108 return 0;
1109
1110free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001111 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001112 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001113 bs->opaque = NULL;
1114 bs->drv = NULL;
1115 return ret;
1116}
1117
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001118static QDict *parse_json_filename(const char *filename, Error **errp)
1119{
1120 QObject *options_obj;
1121 QDict *options;
1122 int ret;
1123
1124 ret = strstart(filename, "json:", &filename);
1125 assert(ret);
1126
1127 options_obj = qobject_from_json(filename);
1128 if (!options_obj) {
1129 error_setg(errp, "Could not parse the JSON options");
1130 return NULL;
1131 }
1132
1133 if (qobject_type(options_obj) != QTYPE_QDICT) {
1134 qobject_decref(options_obj);
1135 error_setg(errp, "Invalid JSON object given");
1136 return NULL;
1137 }
1138
1139 options = qobject_to_qdict(options_obj);
1140 qdict_flatten(options);
1141
1142 return options;
1143}
1144
Kevin Wolf57915332010-04-14 15:24:50 +02001145/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001146 * Fills in default options for opening images and converts the legacy
1147 * filename/flags pair to option QDict entries.
1148 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001149static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001150 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001151{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001152 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001153 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001154 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001155 bool parse_filename = false;
1156 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001157
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001158 /* Parse json: pseudo-protocol */
1159 if (filename && g_str_has_prefix(filename, "json:")) {
1160 QDict *json_options = parse_json_filename(filename, &local_err);
1161 if (local_err) {
1162 error_propagate(errp, local_err);
1163 return -EINVAL;
1164 }
1165
1166 /* Options given in the filename have lower priority than options
1167 * specified directly */
1168 qdict_join(*options, json_options, false);
1169 QDECREF(json_options);
1170 *pfilename = filename = NULL;
1171 }
1172
Kevin Wolff54120f2014-05-26 11:09:59 +02001173 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001174 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001175 if (!qdict_haskey(*options, "filename")) {
1176 qdict_put(*options, "filename", qstring_from_str(filename));
1177 parse_filename = true;
1178 } else {
1179 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1180 "the same time");
1181 return -EINVAL;
1182 }
1183 }
1184
1185 /* Find the right block driver */
1186 filename = qdict_get_try_str(*options, "filename");
1187 drvname = qdict_get_try_str(*options, "driver");
1188
Kevin Wolf17b005f2014-05-27 10:50:29 +02001189 if (drv) {
1190 if (drvname) {
1191 error_setg(errp, "Driver specified twice");
1192 return -EINVAL;
1193 }
1194 drvname = drv->format_name;
1195 qdict_put(*options, "driver", qstring_from_str(drvname));
1196 } else {
1197 if (!drvname && protocol) {
1198 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001199 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001200 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001201 return -EINVAL;
1202 }
1203
1204 drvname = drv->format_name;
1205 qdict_put(*options, "driver", qstring_from_str(drvname));
1206 } else {
1207 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001208 return -EINVAL;
1209 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001210 } else if (drvname) {
1211 drv = bdrv_find_format(drvname);
1212 if (!drv) {
1213 error_setg(errp, "Unknown driver '%s'", drvname);
1214 return -ENOENT;
1215 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001216 }
1217 }
1218
Kevin Wolf17b005f2014-05-27 10:50:29 +02001219 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001220
1221 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001222 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001223 drv->bdrv_parse_filename(filename, *options, &local_err);
1224 if (local_err) {
1225 error_propagate(errp, local_err);
1226 return -EINVAL;
1227 }
1228
1229 if (!drv->bdrv_needs_filename) {
1230 qdict_del(*options, "filename");
1231 }
1232 }
1233
1234 return 0;
1235}
1236
Fam Zheng8d24cce2014-05-23 21:29:45 +08001237void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1238{
1239
Fam Zheng826b6ca2014-05-23 21:29:47 +08001240 if (bs->backing_hd) {
1241 assert(bs->backing_blocker);
1242 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1243 } else if (backing_hd) {
1244 error_setg(&bs->backing_blocker,
Alberto Garcia81e5f782015-04-08 12:29:19 +03001245 "node is used as backing hd of '%s'",
1246 bdrv_get_device_or_node_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001247 }
1248
Fam Zheng8d24cce2014-05-23 21:29:45 +08001249 bs->backing_hd = backing_hd;
1250 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001251 error_free(bs->backing_blocker);
1252 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001253 goto out;
1254 }
1255 bs->open_flags &= ~BDRV_O_NO_BACKING;
1256 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1257 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1258 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001259
1260 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1261 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001262 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001263 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001264out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001265 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001266}
1267
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001268/*
1269 * Opens the backing file for a BlockDriverState if not yet open
1270 *
1271 * options is a QDict of options to pass to the block drivers, or NULL for an
1272 * empty set of options. The reference to the QDict is transferred to this
1273 * function (even on failure), so if the caller intends to reuse the dictionary,
1274 * it needs to use QINCREF() before calling bdrv_file_open.
1275 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001276int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001277{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001278 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001279 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001280 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001281 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001282
1283 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001284 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001285 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001286 }
1287
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001288 /* NULL means an empty set of options */
1289 if (options == NULL) {
1290 options = qdict_new();
1291 }
1292
Paolo Bonzini9156df12012-10-18 16:49:17 +02001293 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001294 if (qdict_haskey(options, "file.filename")) {
1295 backing_filename[0] = '\0';
1296 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001297 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001298 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001299 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001300 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1301 &local_err);
1302 if (local_err) {
1303 ret = -EINVAL;
1304 error_propagate(errp, local_err);
1305 QDECREF(options);
1306 goto free_exit;
1307 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001308 }
1309
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001310 if (!bs->drv || !bs->drv->supports_backing) {
1311 ret = -EINVAL;
1312 error_setg(errp, "Driver doesn't support backing files");
1313 QDECREF(options);
1314 goto free_exit;
1315 }
1316
Markus Armbrustere4e99862014-10-07 13:59:03 +02001317 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001318
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001319 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1320 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001321 }
1322
Max Reitzf67503e2014-02-18 18:33:05 +01001323 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001324 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001325 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001326 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001327 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001328 bdrv_unref(backing_hd);
1329 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001330 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001331 error_setg(errp, "Could not open backing file: %s",
1332 error_get_pretty(local_err));
1333 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001334 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001335 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001336 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001337
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001338free_exit:
1339 g_free(backing_filename);
1340 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001341}
1342
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001343/*
Max Reitzda557aa2013-12-20 19:28:11 +01001344 * Opens a disk image whose options are given as BlockdevRef in another block
1345 * device's options.
1346 *
Max Reitzda557aa2013-12-20 19:28:11 +01001347 * If allow_none is true, no image will be opened if filename is false and no
1348 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1349 *
1350 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1351 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1352 * itself, all options starting with "${bdref_key}." are considered part of the
1353 * BlockdevRef.
1354 *
1355 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001356 *
1357 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001358 */
1359int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1360 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001361 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001362{
1363 QDict *image_options;
1364 int ret;
1365 char *bdref_key_dot;
1366 const char *reference;
1367
Max Reitzf67503e2014-02-18 18:33:05 +01001368 assert(pbs);
1369 assert(*pbs == NULL);
1370
Max Reitzda557aa2013-12-20 19:28:11 +01001371 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1372 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1373 g_free(bdref_key_dot);
1374
1375 reference = qdict_get_try_str(options, bdref_key);
1376 if (!filename && !reference && !qdict_size(image_options)) {
1377 if (allow_none) {
1378 ret = 0;
1379 } else {
1380 error_setg(errp, "A block device must be specified for \"%s\"",
1381 bdref_key);
1382 ret = -EINVAL;
1383 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001384 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001385 goto done;
1386 }
1387
Max Reitzf7d9fd82014-02-18 18:33:12 +01001388 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001389
1390done:
1391 qdict_del(options, bdref_key);
1392 return ret;
1393}
1394
Chen Gang6b8aeca2014-06-23 23:28:23 +08001395int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001396{
1397 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001398 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001399 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001400 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001401 QDict *snapshot_options;
1402 BlockDriverState *bs_snapshot;
1403 Error *local_err;
1404 int ret;
1405
1406 /* if snapshot, we create a temporary backing file and open it
1407 instead of opening 'filename' directly */
1408
1409 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001410 total_size = bdrv_getlength(bs);
1411 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001412 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001413 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001414 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001415 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001416
1417 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001418 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001419 if (ret < 0) {
1420 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001421 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001422 }
1423
Max Reitzef810432014-12-02 18:32:42 +01001424 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001425 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001426 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001427 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001428 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001429 if (ret < 0) {
1430 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1431 "'%s': %s", tmp_filename,
1432 error_get_pretty(local_err));
1433 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001434 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001435 }
1436
1437 /* Prepare a new options QDict for the temporary file */
1438 snapshot_options = qdict_new();
1439 qdict_put(snapshot_options, "file.driver",
1440 qstring_from_str("file"));
1441 qdict_put(snapshot_options, "file.filename",
1442 qstring_from_str(tmp_filename));
1443
Markus Armbrustere4e99862014-10-07 13:59:03 +02001444 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001445
1446 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001447 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001448 if (ret < 0) {
1449 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001450 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001451 }
1452
1453 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001454
1455out:
1456 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001457 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001458}
1459
Max Reitzda557aa2013-12-20 19:28:11 +01001460/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001461 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001462 *
1463 * options is a QDict of options to pass to the block drivers, or NULL for an
1464 * empty set of options. The reference to the QDict belongs to the block layer
1465 * after the call (even on failure), so if the caller intends to reuse the
1466 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001467 *
1468 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1469 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001470 *
1471 * The reference parameter may be used to specify an existing block device which
1472 * should be opened. If specified, neither options nor a filename may be given,
1473 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001474 */
Max Reitzddf56362014-02-18 18:33:06 +01001475int bdrv_open(BlockDriverState **pbs, const char *filename,
1476 const char *reference, QDict *options, int flags,
1477 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001478{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001479 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001480 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001481 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001482 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001483 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001484
Max Reitzf67503e2014-02-18 18:33:05 +01001485 assert(pbs);
1486
Max Reitzddf56362014-02-18 18:33:06 +01001487 if (reference) {
1488 bool options_non_empty = options ? qdict_size(options) : false;
1489 QDECREF(options);
1490
1491 if (*pbs) {
1492 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1493 "another block device");
1494 return -EINVAL;
1495 }
1496
1497 if (filename || options_non_empty) {
1498 error_setg(errp, "Cannot reference an existing block device with "
1499 "additional options or a new filename");
1500 return -EINVAL;
1501 }
1502
1503 bs = bdrv_lookup_bs(reference, reference, errp);
1504 if (!bs) {
1505 return -ENODEV;
1506 }
1507 bdrv_ref(bs);
1508 *pbs = bs;
1509 return 0;
1510 }
1511
Max Reitzf67503e2014-02-18 18:33:05 +01001512 if (*pbs) {
1513 bs = *pbs;
1514 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001515 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001516 }
1517
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001518 /* NULL means an empty set of options */
1519 if (options == NULL) {
1520 options = qdict_new();
1521 }
1522
Kevin Wolf17b005f2014-05-27 10:50:29 +02001523 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001524 if (local_err) {
1525 goto fail;
1526 }
1527
Kevin Wolf76c591b2014-06-04 14:19:44 +02001528 /* Find the right image format driver */
1529 drv = NULL;
1530 drvname = qdict_get_try_str(options, "driver");
1531 if (drvname) {
1532 drv = bdrv_find_format(drvname);
1533 qdict_del(options, "driver");
1534 if (!drv) {
1535 error_setg(errp, "Unknown driver: '%s'", drvname);
1536 ret = -EINVAL;
1537 goto fail;
1538 }
1539 }
1540
1541 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1542 if (drv && !drv->bdrv_file_open) {
1543 /* If the user explicitly wants a format driver here, we'll need to add
1544 * another layer for the protocol in bs->file */
1545 flags &= ~BDRV_O_PROTOCOL;
1546 }
1547
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001548 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001549 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001550
Kevin Wolff4788ad2014-06-03 16:44:19 +02001551 /* Open image file without format layer */
1552 if ((flags & BDRV_O_PROTOCOL) == 0) {
1553 if (flags & BDRV_O_RDWR) {
1554 flags |= BDRV_O_ALLOW_RDWR;
1555 }
1556 if (flags & BDRV_O_SNAPSHOT) {
1557 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1558 flags = bdrv_backing_flags(flags);
1559 }
1560
1561 assert(file == NULL);
1562 ret = bdrv_open_image(&file, filename, options, "file",
1563 bdrv_inherited_flags(flags),
1564 true, &local_err);
1565 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001566 goto fail;
1567 }
1568 }
1569
Kevin Wolf76c591b2014-06-04 14:19:44 +02001570 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001571 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001572 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001573 ret = find_image_format(file, filename, &drv, &local_err);
1574 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001575 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001576 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001577 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001578 error_setg(errp, "Must specify either driver or file");
1579 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001580 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001581 }
1582
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001583 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001584 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001585 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001586 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001587 }
1588
Max Reitz2a05cbe2013-12-20 19:28:10 +01001589 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001590 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001591 file = NULL;
1592 }
1593
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001594 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001595 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001596 QDict *backing_options;
1597
Benoît Canet5726d872013-09-25 13:30:01 +02001598 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001599 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001600 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001601 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001602 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001603 }
1604
Max Reitz91af7012014-07-18 20:24:56 +02001605 bdrv_refresh_filename(bs);
1606
Kevin Wolfb9988752014-04-03 12:09:34 +02001607 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1608 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001609 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001610 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001611 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001612 goto close_and_fail;
1613 }
1614 }
1615
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001616 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001617 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001618 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001619 if (flags & BDRV_O_PROTOCOL) {
1620 error_setg(errp, "Block protocol '%s' doesn't support the option "
1621 "'%s'", drv->format_name, entry->key);
1622 } else {
1623 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1624 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001625 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001626 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001627
1628 ret = -EINVAL;
1629 goto close_and_fail;
1630 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001631
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001632 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001633 if (bs->blk) {
1634 blk_dev_change_media_cb(bs->blk, true);
1635 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001636 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1637 && !runstate_check(RUN_STATE_INMIGRATE)
1638 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1639 error_setg(errp,
1640 "Guest must be stopped for opening of encrypted image");
1641 ret = -EBUSY;
1642 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001643 }
1644
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001645 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001646 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001647 return 0;
1648
Kevin Wolf8bfea152014-04-11 19:16:36 +02001649fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001650 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001651 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001652 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001653 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001654 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001655 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001656 if (!*pbs) {
1657 /* If *pbs is NULL, a new BDS has been created in this function and
1658 needs to be freed now. Otherwise, it does not need to be closed,
1659 since it has not really been opened yet. */
1660 bdrv_unref(bs);
1661 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001662 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001663 error_propagate(errp, local_err);
1664 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001665 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001666
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001667close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001668 /* See fail path, but now the BDS has to be always closed */
1669 if (*pbs) {
1670 bdrv_close(bs);
1671 } else {
1672 bdrv_unref(bs);
1673 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001674 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001675 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001676 error_propagate(errp, local_err);
1677 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001678 return ret;
1679}
1680
Jeff Codye971aa12012-09-20 15:13:19 -04001681typedef struct BlockReopenQueueEntry {
1682 bool prepared;
1683 BDRVReopenState state;
1684 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1685} BlockReopenQueueEntry;
1686
1687/*
1688 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1689 * reopen of multiple devices.
1690 *
1691 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1692 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1693 * be created and initialized. This newly created BlockReopenQueue should be
1694 * passed back in for subsequent calls that are intended to be of the same
1695 * atomic 'set'.
1696 *
1697 * bs is the BlockDriverState to add to the reopen queue.
1698 *
1699 * flags contains the open flags for the associated bs
1700 *
1701 * returns a pointer to bs_queue, which is either the newly allocated
1702 * bs_queue, or the existing bs_queue being used.
1703 *
1704 */
1705BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1706 BlockDriverState *bs, int flags)
1707{
1708 assert(bs != NULL);
1709
1710 BlockReopenQueueEntry *bs_entry;
1711 if (bs_queue == NULL) {
1712 bs_queue = g_new0(BlockReopenQueue, 1);
1713 QSIMPLEQ_INIT(bs_queue);
1714 }
1715
Kevin Wolff1f25a22014-04-25 19:04:55 +02001716 /* bdrv_open() masks this flag out */
1717 flags &= ~BDRV_O_PROTOCOL;
1718
Jeff Codye971aa12012-09-20 15:13:19 -04001719 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001720 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001721 }
1722
1723 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1724 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1725
1726 bs_entry->state.bs = bs;
1727 bs_entry->state.flags = flags;
1728
1729 return bs_queue;
1730}
1731
1732/*
1733 * Reopen multiple BlockDriverStates atomically & transactionally.
1734 *
1735 * The queue passed in (bs_queue) must have been built up previous
1736 * via bdrv_reopen_queue().
1737 *
1738 * Reopens all BDS specified in the queue, with the appropriate
1739 * flags. All devices are prepared for reopen, and failure of any
1740 * device will cause all device changes to be abandonded, and intermediate
1741 * data cleaned up.
1742 *
1743 * If all devices prepare successfully, then the changes are committed
1744 * to all devices.
1745 *
1746 */
1747int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1748{
1749 int ret = -1;
1750 BlockReopenQueueEntry *bs_entry, *next;
1751 Error *local_err = NULL;
1752
1753 assert(bs_queue != NULL);
1754
1755 bdrv_drain_all();
1756
1757 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1758 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1759 error_propagate(errp, local_err);
1760 goto cleanup;
1761 }
1762 bs_entry->prepared = true;
1763 }
1764
1765 /* If we reach this point, we have success and just need to apply the
1766 * changes
1767 */
1768 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1769 bdrv_reopen_commit(&bs_entry->state);
1770 }
1771
1772 ret = 0;
1773
1774cleanup:
1775 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1776 if (ret && bs_entry->prepared) {
1777 bdrv_reopen_abort(&bs_entry->state);
1778 }
1779 g_free(bs_entry);
1780 }
1781 g_free(bs_queue);
1782 return ret;
1783}
1784
1785
1786/* Reopen a single BlockDriverState with the specified flags. */
1787int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1788{
1789 int ret = -1;
1790 Error *local_err = NULL;
1791 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1792
1793 ret = bdrv_reopen_multiple(queue, &local_err);
1794 if (local_err != NULL) {
1795 error_propagate(errp, local_err);
1796 }
1797 return ret;
1798}
1799
1800
1801/*
1802 * Prepares a BlockDriverState for reopen. All changes are staged in the
1803 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1804 * the block driver layer .bdrv_reopen_prepare()
1805 *
1806 * bs is the BlockDriverState to reopen
1807 * flags are the new open flags
1808 * queue is the reopen queue
1809 *
1810 * Returns 0 on success, non-zero on error. On error errp will be set
1811 * as well.
1812 *
1813 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1814 * It is the responsibility of the caller to then call the abort() or
1815 * commit() for any other BDS that have been left in a prepare() state
1816 *
1817 */
1818int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1819 Error **errp)
1820{
1821 int ret = -1;
1822 Error *local_err = NULL;
1823 BlockDriver *drv;
1824
1825 assert(reopen_state != NULL);
1826 assert(reopen_state->bs->drv != NULL);
1827 drv = reopen_state->bs->drv;
1828
1829 /* if we are to stay read-only, do not allow permission change
1830 * to r/w */
1831 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1832 reopen_state->flags & BDRV_O_RDWR) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03001833 error_setg(errp, "Node '%s' is read only",
1834 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001835 goto error;
1836 }
1837
1838
1839 ret = bdrv_flush(reopen_state->bs);
1840 if (ret) {
1841 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1842 strerror(-ret));
1843 goto error;
1844 }
1845
1846 if (drv->bdrv_reopen_prepare) {
1847 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1848 if (ret) {
1849 if (local_err != NULL) {
1850 error_propagate(errp, local_err);
1851 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001852 error_setg(errp, "failed while preparing to reopen image '%s'",
1853 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001854 }
1855 goto error;
1856 }
1857 } else {
1858 /* It is currently mandatory to have a bdrv_reopen_prepare()
1859 * handler for each supported drv. */
Alberto Garcia81e5f782015-04-08 12:29:19 +03001860 error_setg(errp, "Block format '%s' used by node '%s' "
1861 "does not support reopening files", drv->format_name,
1862 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001863 ret = -1;
1864 goto error;
1865 }
1866
1867 ret = 0;
1868
1869error:
1870 return ret;
1871}
1872
1873/*
1874 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1875 * makes them final by swapping the staging BlockDriverState contents into
1876 * the active BlockDriverState contents.
1877 */
1878void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1879{
1880 BlockDriver *drv;
1881
1882 assert(reopen_state != NULL);
1883 drv = reopen_state->bs->drv;
1884 assert(drv != NULL);
1885
1886 /* If there are any driver level actions to take */
1887 if (drv->bdrv_reopen_commit) {
1888 drv->bdrv_reopen_commit(reopen_state);
1889 }
1890
1891 /* set BDS specific flags now */
1892 reopen_state->bs->open_flags = reopen_state->flags;
1893 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1894 BDRV_O_CACHE_WB);
1895 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001896
Kevin Wolf3baca892014-07-16 17:48:16 +02001897 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001898}
1899
1900/*
1901 * Abort the reopen, and delete and free the staged changes in
1902 * reopen_state
1903 */
1904void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1905{
1906 BlockDriver *drv;
1907
1908 assert(reopen_state != NULL);
1909 drv = reopen_state->bs->drv;
1910 assert(drv != NULL);
1911
1912 if (drv->bdrv_reopen_abort) {
1913 drv->bdrv_reopen_abort(reopen_state);
1914 }
1915}
1916
1917
bellardfc01f7e2003-06-30 10:03:06 +00001918void bdrv_close(BlockDriverState *bs)
1919{
Max Reitz33384422014-06-20 21:57:33 +02001920 BdrvAioNotifier *ban, *ban_next;
1921
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001922 if (bs->job) {
1923 block_job_cancel_sync(bs->job);
1924 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001925 bdrv_drain_all(); /* complete I/O */
1926 bdrv_flush(bs);
1927 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001928 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001929
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001930 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001931 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001932 BlockDriverState *backing_hd = bs->backing_hd;
1933 bdrv_set_backing_hd(bs, NULL);
1934 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001935 }
bellardea2384d2004-08-01 21:59:26 +00001936 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001937 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001938 bs->opaque = NULL;
1939 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001940 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001941 bs->backing_file[0] = '\0';
1942 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001943 bs->total_sectors = 0;
1944 bs->encrypted = 0;
1945 bs->valid_key = 0;
1946 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001947 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001948 QDECREF(bs->options);
1949 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001950 QDECREF(bs->full_open_options);
1951 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001952
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001953 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001954 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001955 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001956 }
bellardb3380822004-03-14 21:38:54 +00001957 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001958
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001959 if (bs->blk) {
1960 blk_dev_change_media_cb(bs->blk, false);
1961 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001962
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001963 /*throttling disk I/O limits*/
1964 if (bs->io_limits_enabled) {
1965 bdrv_io_limits_disable(bs);
1966 }
Max Reitz33384422014-06-20 21:57:33 +02001967
1968 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1969 g_free(ban);
1970 }
1971 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001972}
1973
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001974void bdrv_close_all(void)
1975{
1976 BlockDriverState *bs;
1977
Benoît Canetdc364f42014-01-23 21:31:32 +01001978 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001979 AioContext *aio_context = bdrv_get_aio_context(bs);
1980
1981 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001982 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001983 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001984 }
1985}
1986
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001987/* Check if any requests are in-flight (including throttled requests) */
1988static bool bdrv_requests_pending(BlockDriverState *bs)
1989{
1990 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1991 return true;
1992 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001993 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1994 return true;
1995 }
1996 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001997 return true;
1998 }
1999 if (bs->file && bdrv_requests_pending(bs->file)) {
2000 return true;
2001 }
2002 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
2003 return true;
2004 }
2005 return false;
2006}
2007
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002008static bool bdrv_drain_one(BlockDriverState *bs)
2009{
2010 bool bs_busy;
2011
2012 bdrv_flush_io_queue(bs);
2013 bdrv_start_throttled_reqs(bs);
2014 bs_busy = bdrv_requests_pending(bs);
2015 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2016 return bs_busy;
2017}
2018
2019/*
2020 * Wait for pending requests to complete on a single BlockDriverState subtree
2021 *
2022 * See the warning in bdrv_drain_all(). This function can only be called if
2023 * you are sure nothing can generate I/O because you have op blockers
2024 * installed.
2025 *
2026 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2027 * AioContext.
2028 */
2029void bdrv_drain(BlockDriverState *bs)
2030{
2031 while (bdrv_drain_one(bs)) {
2032 /* Keep iterating */
2033 }
2034}
2035
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002036/*
2037 * Wait for pending requests to complete across all BlockDriverStates
2038 *
2039 * This function does not flush data to disk, use bdrv_flush_all() for that
2040 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002041 *
2042 * Note that completion of an asynchronous I/O operation can trigger any
2043 * number of other I/O operations on other devices---for example a coroutine
2044 * can be arbitrarily complex and a constant flow of I/O can come until the
2045 * coroutine is complete. Because of this, it is not possible to have a
2046 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002047 */
2048void bdrv_drain_all(void)
2049{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002050 /* Always run first iteration so any pending completion BHs run */
2051 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002052 BlockDriverState *bs;
2053
Fam Zheng69da3b02015-04-03 22:05:19 +08002054 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2055 AioContext *aio_context = bdrv_get_aio_context(bs);
2056
2057 aio_context_acquire(aio_context);
2058 if (bs->job) {
2059 block_job_pause(bs->job);
2060 }
2061 aio_context_release(aio_context);
2062 }
2063
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002064 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002065 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002066
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002067 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2068 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002069
2070 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002071 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002072 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002073 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002074 }
Fam Zheng69da3b02015-04-03 22:05:19 +08002075
2076 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2077 AioContext *aio_context = bdrv_get_aio_context(bs);
2078
2079 aio_context_acquire(aio_context);
2080 if (bs->job) {
2081 block_job_resume(bs->job);
2082 }
2083 aio_context_release(aio_context);
2084 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002085}
2086
Benoît Canetdc364f42014-01-23 21:31:32 +01002087/* make a BlockDriverState anonymous by removing from bdrv_state and
2088 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002089 Also, NULL terminate the device_name to prevent double remove */
2090void bdrv_make_anon(BlockDriverState *bs)
2091{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002092 /*
2093 * Take care to remove bs from bdrv_states only when it's actually
2094 * in it. Note that bs->device_list.tqe_prev is initially null,
2095 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2096 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2097 * resetting it to null on remove.
2098 */
2099 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002100 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002101 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002102 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002103 if (bs->node_name[0] != '\0') {
2104 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2105 }
2106 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002107}
2108
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002109static void bdrv_rebind(BlockDriverState *bs)
2110{
2111 if (bs->drv && bs->drv->bdrv_rebind) {
2112 bs->drv->bdrv_rebind(bs);
2113 }
2114}
2115
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002116static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2117 BlockDriverState *bs_src)
2118{
2119 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002120
2121 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002122 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002123 bs_dest->copy_on_read = bs_src->copy_on_read;
2124
2125 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2126
Benoît Canetcc0681c2013-09-02 14:14:39 +02002127 /* i/o throttled req */
2128 memcpy(&bs_dest->throttle_state,
2129 &bs_src->throttle_state,
2130 sizeof(ThrottleState));
2131 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2132 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002133 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2134
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002135 /* r/w error */
2136 bs_dest->on_read_error = bs_src->on_read_error;
2137 bs_dest->on_write_error = bs_src->on_write_error;
2138
2139 /* i/o status */
2140 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2141 bs_dest->iostatus = bs_src->iostatus;
2142
2143 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002144 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002145
Fam Zheng9fcb0252013-08-23 09:14:46 +08002146 /* reference count */
2147 bs_dest->refcnt = bs_src->refcnt;
2148
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002149 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002150 bs_dest->job = bs_src->job;
2151
2152 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002153 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002154 bs_dest->blk = bs_src->blk;
2155
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002156 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2157 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002158}
2159
2160/*
2161 * Swap bs contents for two image chains while they are live,
2162 * while keeping required fields on the BlockDriverState that is
2163 * actually attached to a device.
2164 *
2165 * This will modify the BlockDriverState fields, and swap contents
2166 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2167 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002168 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002169 *
2170 * This function does not create any image files.
2171 */
2172void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2173{
2174 BlockDriverState tmp;
2175
Benoît Canet90ce8a02014-03-05 23:48:29 +01002176 /* The code needs to swap the node_name but simply swapping node_list won't
2177 * work so first remove the nodes from the graph list, do the swap then
2178 * insert them back if needed.
2179 */
2180 if (bs_new->node_name[0] != '\0') {
2181 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2182 }
2183 if (bs_old->node_name[0] != '\0') {
2184 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2185 }
2186
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002187 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002188 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002189 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002190 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002191 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002192 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002193
2194 tmp = *bs_new;
2195 *bs_new = *bs_old;
2196 *bs_old = tmp;
2197
2198 /* there are some fields that should not be swapped, move them back */
2199 bdrv_move_feature_fields(&tmp, bs_old);
2200 bdrv_move_feature_fields(bs_old, bs_new);
2201 bdrv_move_feature_fields(bs_new, &tmp);
2202
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002203 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002204 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002205
2206 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002207 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002208 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002209 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002210
Benoît Canet90ce8a02014-03-05 23:48:29 +01002211 /* insert the nodes back into the graph node list if needed */
2212 if (bs_new->node_name[0] != '\0') {
2213 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2214 }
2215 if (bs_old->node_name[0] != '\0') {
2216 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2217 }
2218
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002219 bdrv_rebind(bs_new);
2220 bdrv_rebind(bs_old);
2221}
2222
Jeff Cody8802d1f2012-02-28 15:54:06 -05002223/*
2224 * Add new bs contents at the top of an image chain while the chain is
2225 * live, while keeping required fields on the top layer.
2226 *
2227 * This will modify the BlockDriverState fields, and swap contents
2228 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2229 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002230 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002231 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002232 * This function does not create any image files.
2233 */
2234void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2235{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002236 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002237
2238 /* The contents of 'tmp' will become bs_top, as we are
2239 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002240 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002241}
2242
Fam Zheng4f6fd342013-08-23 09:14:47 +08002243static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002244{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002245 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002246 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002247 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002248 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002249
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002250 bdrv_close(bs);
2251
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002252 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002253 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002254
Anthony Liguori7267c092011-08-20 22:09:37 -05002255 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002256}
2257
aliguorie97fc192009-04-21 23:11:50 +00002258/*
2259 * Run consistency checks on an image
2260 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002261 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002262 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002263 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002264 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002265int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002266{
Max Reitz908bcd52014-08-07 22:47:55 +02002267 if (bs->drv == NULL) {
2268 return -ENOMEDIUM;
2269 }
aliguorie97fc192009-04-21 23:11:50 +00002270 if (bs->drv->bdrv_check == NULL) {
2271 return -ENOTSUP;
2272 }
2273
Kevin Wolfe076f332010-06-29 11:43:13 +02002274 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002275 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002276}
2277
Kevin Wolf8a426612010-07-16 17:17:01 +02002278#define COMMIT_BUF_SECTORS 2048
2279
bellard33e39632003-07-06 17:15:21 +00002280/* commit COW file into the raw image */
2281int bdrv_commit(BlockDriverState *bs)
2282{
bellard19cb3732006-08-19 11:45:59 +00002283 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002284 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002285 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002286 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002287 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002288
bellard19cb3732006-08-19 11:45:59 +00002289 if (!drv)
2290 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002291
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002292 if (!bs->backing_hd) {
2293 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002294 }
2295
Fam Zhengbb000212014-09-11 13:14:00 +08002296 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2297 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002298 return -EBUSY;
2299 }
2300
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002301 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002302 open_flags = bs->backing_hd->open_flags;
2303
2304 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002305 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2306 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002307 }
bellard33e39632003-07-06 17:15:21 +00002308 }
bellardea2384d2004-08-01 21:59:26 +00002309
Jeff Cody72706ea2014-01-24 09:02:35 -05002310 length = bdrv_getlength(bs);
2311 if (length < 0) {
2312 ret = length;
2313 goto ro_cleanup;
2314 }
2315
2316 backing_length = bdrv_getlength(bs->backing_hd);
2317 if (backing_length < 0) {
2318 ret = backing_length;
2319 goto ro_cleanup;
2320 }
2321
2322 /* If our top snapshot is larger than the backing file image,
2323 * grow the backing file image if possible. If not possible,
2324 * we must return an error */
2325 if (length > backing_length) {
2326 ret = bdrv_truncate(bs->backing_hd, length);
2327 if (ret < 0) {
2328 goto ro_cleanup;
2329 }
2330 }
2331
2332 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002333
2334 /* qemu_try_blockalign() for bs will choose an alignment that works for
2335 * bs->backing_hd as well, so no need to compare the alignment manually. */
2336 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2337 if (buf == NULL) {
2338 ret = -ENOMEM;
2339 goto ro_cleanup;
2340 }
bellardea2384d2004-08-01 21:59:26 +00002341
Kevin Wolf8a426612010-07-16 17:17:01 +02002342 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002343 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2344 if (ret < 0) {
2345 goto ro_cleanup;
2346 }
2347 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002348 ret = bdrv_read(bs, sector, buf, n);
2349 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002350 goto ro_cleanup;
2351 }
2352
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002353 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2354 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002355 goto ro_cleanup;
2356 }
bellardea2384d2004-08-01 21:59:26 +00002357 }
2358 }
bellard95389c82005-12-18 18:28:15 +00002359
Christoph Hellwig1d449522010-01-17 12:32:30 +01002360 if (drv->bdrv_make_empty) {
2361 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002362 if (ret < 0) {
2363 goto ro_cleanup;
2364 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002365 bdrv_flush(bs);
2366 }
bellard95389c82005-12-18 18:28:15 +00002367
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002368 /*
2369 * Make sure all data we wrote to the backing device is actually
2370 * stable on disk.
2371 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002372 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002373 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002374 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002375
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002376 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002377ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002378 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002379
2380 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002381 /* ignoring error return here */
2382 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002383 }
2384
Christoph Hellwig1d449522010-01-17 12:32:30 +01002385 return ret;
bellard33e39632003-07-06 17:15:21 +00002386}
2387
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002388int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002389{
2390 BlockDriverState *bs;
2391
Benoît Canetdc364f42014-01-23 21:31:32 +01002392 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002393 AioContext *aio_context = bdrv_get_aio_context(bs);
2394
2395 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002396 if (bs->drv && bs->backing_hd) {
2397 int ret = bdrv_commit(bs);
2398 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002399 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002400 return ret;
2401 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002402 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002403 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002404 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002405 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002406}
2407
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002408/**
2409 * Remove an active request from the tracked requests list
2410 *
2411 * This function should be called when a tracked request is completing.
2412 */
2413static void tracked_request_end(BdrvTrackedRequest *req)
2414{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002415 if (req->serialising) {
2416 req->bs->serialising_in_flight--;
2417 }
2418
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002419 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002420 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002421}
2422
2423/**
2424 * Add an active request to the tracked requests list
2425 */
2426static void tracked_request_begin(BdrvTrackedRequest *req,
2427 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002428 int64_t offset,
2429 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002430{
2431 *req = (BdrvTrackedRequest){
2432 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002433 .offset = offset,
2434 .bytes = bytes,
2435 .is_write = is_write,
2436 .co = qemu_coroutine_self(),
2437 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002438 .overlap_offset = offset,
2439 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002440 };
2441
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002442 qemu_co_queue_init(&req->wait_queue);
2443
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002444 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2445}
2446
Kevin Wolfe96126f2014-02-08 10:42:18 +01002447static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002448{
Kevin Wolf73271452013-12-04 17:08:50 +01002449 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002450 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2451 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002452
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002453 if (!req->serialising) {
2454 req->bs->serialising_in_flight++;
2455 req->serialising = true;
2456 }
Kevin Wolf73271452013-12-04 17:08:50 +01002457
2458 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2459 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002460}
2461
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002462/**
2463 * Round a region to cluster boundaries
2464 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002465void bdrv_round_to_clusters(BlockDriverState *bs,
2466 int64_t sector_num, int nb_sectors,
2467 int64_t *cluster_sector_num,
2468 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002469{
2470 BlockDriverInfo bdi;
2471
2472 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2473 *cluster_sector_num = sector_num;
2474 *cluster_nb_sectors = nb_sectors;
2475 } else {
2476 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2477 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2478 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2479 nb_sectors, c);
2480 }
2481}
2482
Kevin Wolf73271452013-12-04 17:08:50 +01002483static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002484{
2485 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002486 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002487
Kevin Wolf73271452013-12-04 17:08:50 +01002488 ret = bdrv_get_info(bs, &bdi);
2489 if (ret < 0 || bdi.cluster_size == 0) {
2490 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002491 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002492 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002493 }
2494}
2495
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002496static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002497 int64_t offset, unsigned int bytes)
2498{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002499 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002500 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002501 return false;
2502 }
2503 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002504 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002505 return false;
2506 }
2507 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002508}
2509
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002510static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002511{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002512 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002513 BdrvTrackedRequest *req;
2514 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002515 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002516
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002517 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002518 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002519 }
2520
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002521 do {
2522 retry = false;
2523 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002524 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002525 continue;
2526 }
Kevin Wolf73271452013-12-04 17:08:50 +01002527 if (tracked_request_overlaps(req, self->overlap_offset,
2528 self->overlap_bytes))
2529 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002530 /* Hitting this means there was a reentrant request, for
2531 * example, a block driver issuing nested requests. This must
2532 * never happen since it means deadlock.
2533 */
2534 assert(qemu_coroutine_self() != req->co);
2535
Kevin Wolf64604402013-12-13 13:04:35 +01002536 /* If the request is already (indirectly) waiting for us, or
2537 * will wait for us as soon as it wakes up, then just go on
2538 * (instead of producing a deadlock in the former case). */
2539 if (!req->waiting_for) {
2540 self->waiting_for = req;
2541 qemu_co_queue_wait(&req->wait_queue);
2542 self->waiting_for = NULL;
2543 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002544 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002545 break;
2546 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002547 }
2548 }
2549 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002550
2551 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002552}
2553
Kevin Wolf756e6732010-01-12 12:55:17 +01002554/*
2555 * Return values:
2556 * 0 - success
2557 * -EINVAL - backing format specified, but no file
2558 * -ENOSPC - can't update the backing file because no space is left in the
2559 * image file header
2560 * -ENOTSUP - format driver doesn't support changing the backing file
2561 */
2562int bdrv_change_backing_file(BlockDriverState *bs,
2563 const char *backing_file, const char *backing_fmt)
2564{
2565 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002566 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002567
Paolo Bonzini5f377792012-04-12 14:01:01 +02002568 /* Backing file format doesn't make sense without a backing file */
2569 if (backing_fmt && !backing_file) {
2570 return -EINVAL;
2571 }
2572
Kevin Wolf756e6732010-01-12 12:55:17 +01002573 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002574 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002575 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002576 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002577 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002578
2579 if (ret == 0) {
2580 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2581 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2582 }
2583 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002584}
2585
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002586/*
2587 * Finds the image layer in the chain that has 'bs' as its backing file.
2588 *
2589 * active is the current topmost image.
2590 *
2591 * Returns NULL if bs is not found in active's image chain,
2592 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002593 *
2594 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002595 */
2596BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2597 BlockDriverState *bs)
2598{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002599 while (active && bs != active->backing_hd) {
2600 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002601 }
2602
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002603 return active;
2604}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002605
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002606/* Given a BDS, searches for the base layer. */
2607BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2608{
2609 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002610}
2611
2612typedef struct BlkIntermediateStates {
2613 BlockDriverState *bs;
2614 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2615} BlkIntermediateStates;
2616
2617
2618/*
2619 * Drops images above 'base' up to and including 'top', and sets the image
2620 * above 'top' to have base as its backing file.
2621 *
2622 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2623 * information in 'bs' can be properly updated.
2624 *
2625 * E.g., this will convert the following chain:
2626 * bottom <- base <- intermediate <- top <- active
2627 *
2628 * to
2629 *
2630 * bottom <- base <- active
2631 *
2632 * It is allowed for bottom==base, in which case it converts:
2633 *
2634 * base <- intermediate <- top <- active
2635 *
2636 * to
2637 *
2638 * base <- active
2639 *
Jeff Cody54e26902014-06-25 15:40:10 -04002640 * If backing_file_str is non-NULL, it will be used when modifying top's
2641 * overlay image metadata.
2642 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002643 * Error conditions:
2644 * if active == top, that is considered an error
2645 *
2646 */
2647int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002648 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002649{
2650 BlockDriverState *intermediate;
2651 BlockDriverState *base_bs = NULL;
2652 BlockDriverState *new_top_bs = NULL;
2653 BlkIntermediateStates *intermediate_state, *next;
2654 int ret = -EIO;
2655
2656 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2657 QSIMPLEQ_INIT(&states_to_delete);
2658
2659 if (!top->drv || !base->drv) {
2660 goto exit;
2661 }
2662
2663 new_top_bs = bdrv_find_overlay(active, top);
2664
2665 if (new_top_bs == NULL) {
2666 /* we could not find the image above 'top', this is an error */
2667 goto exit;
2668 }
2669
2670 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2671 * to do, no intermediate images */
2672 if (new_top_bs->backing_hd == base) {
2673 ret = 0;
2674 goto exit;
2675 }
2676
2677 intermediate = top;
2678
2679 /* now we will go down through the list, and add each BDS we find
2680 * into our deletion queue, until we hit the 'base'
2681 */
2682 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002683 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002684 intermediate_state->bs = intermediate;
2685 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2686
2687 if (intermediate->backing_hd == base) {
2688 base_bs = intermediate->backing_hd;
2689 break;
2690 }
2691 intermediate = intermediate->backing_hd;
2692 }
2693 if (base_bs == NULL) {
2694 /* something went wrong, we did not end at the base. safely
2695 * unravel everything, and exit with error */
2696 goto exit;
2697 }
2698
2699 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002700 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2701 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002702 base_bs->drv ? base_bs->drv->format_name : "");
2703 if (ret) {
2704 goto exit;
2705 }
Fam Zheng920beae2014-05-23 21:29:46 +08002706 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002707
2708 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2709 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002710 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002711 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002712 }
2713 ret = 0;
2714
2715exit:
2716 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2717 g_free(intermediate_state);
2718 }
2719 return ret;
2720}
2721
2722
aliguori71d07702009-03-03 17:37:16 +00002723static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2724 size_t size)
2725{
Peter Lieven75af1f32015-02-06 11:54:11 +01002726 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002727 return -EIO;
2728 }
2729
Max Reitzc0191e72015-02-05 13:58:24 -05002730 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002731 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002732 }
aliguori71d07702009-03-03 17:37:16 +00002733
Max Reitzc0191e72015-02-05 13:58:24 -05002734 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002735 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002736 }
aliguori71d07702009-03-03 17:37:16 +00002737
2738 return 0;
2739}
2740
2741static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2742 int nb_sectors)
2743{
Peter Lieven75af1f32015-02-06 11:54:11 +01002744 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002745 return -EIO;
2746 }
2747
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002748 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2749 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002750}
2751
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002752typedef struct RwCo {
2753 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002754 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002755 QEMUIOVector *qiov;
2756 bool is_write;
2757 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002758 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002759} RwCo;
2760
2761static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2762{
2763 RwCo *rwco = opaque;
2764
2765 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002766 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2767 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002768 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002769 } else {
2770 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2771 rwco->qiov->size, rwco->qiov,
2772 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002773 }
2774}
2775
2776/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002777 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002778 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002779static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2780 QEMUIOVector *qiov, bool is_write,
2781 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002782{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002783 Coroutine *co;
2784 RwCo rwco = {
2785 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002786 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002787 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002788 .is_write = is_write,
2789 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002790 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002791 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002792
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002793 /**
2794 * In sync call context, when the vcpu is blocked, this throttling timer
2795 * will not fire; so the I/O throttling function has to be disabled here
2796 * if it has been enabled.
2797 */
2798 if (bs->io_limits_enabled) {
2799 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2800 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2801 bdrv_io_limits_disable(bs);
2802 }
2803
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002804 if (qemu_in_coroutine()) {
2805 /* Fast-path if already in coroutine context */
2806 bdrv_rw_co_entry(&rwco);
2807 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002808 AioContext *aio_context = bdrv_get_aio_context(bs);
2809
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002810 co = qemu_coroutine_create(bdrv_rw_co_entry);
2811 qemu_coroutine_enter(co, &rwco);
2812 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002813 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002814 }
2815 }
2816 return rwco.ret;
2817}
2818
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002819/*
2820 * Process a synchronous request using coroutines
2821 */
2822static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002823 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002824{
2825 QEMUIOVector qiov;
2826 struct iovec iov = {
2827 .iov_base = (void *)buf,
2828 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2829 };
2830
Peter Lieven75af1f32015-02-06 11:54:11 +01002831 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002832 return -EINVAL;
2833 }
2834
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002835 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002836 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2837 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002838}
2839
bellard19cb3732006-08-19 11:45:59 +00002840/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002841int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002842 uint8_t *buf, int nb_sectors)
2843{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002844 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002845}
2846
Markus Armbruster07d27a42012-06-29 17:34:29 +02002847/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2848int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2849 uint8_t *buf, int nb_sectors)
2850{
2851 bool enabled;
2852 int ret;
2853
2854 enabled = bs->io_limits_enabled;
2855 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002856 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002857 bs->io_limits_enabled = enabled;
2858 return ret;
2859}
2860
ths5fafdf22007-09-16 21:08:06 +00002861/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002862 -EIO generic I/O error (may happen for all errors)
2863 -ENOMEDIUM No media inserted.
2864 -EINVAL Invalid sector number or nb_sectors
2865 -EACCES Trying to write a read-only device
2866*/
ths5fafdf22007-09-16 21:08:06 +00002867int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002868 const uint8_t *buf, int nb_sectors)
2869{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002870 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002871}
2872
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002873int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2874 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002875{
2876 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002877 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002878}
2879
Peter Lievend75cbb52013-10-24 12:07:03 +02002880/*
2881 * Completely zero out a block device with the help of bdrv_write_zeroes.
2882 * The operation is sped up by checking the block status and only writing
2883 * zeroes to the device if they currently do not return zeroes. Optional
2884 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2885 *
2886 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2887 */
2888int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2889{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002890 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002891 int n;
2892
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002893 target_sectors = bdrv_nb_sectors(bs);
2894 if (target_sectors < 0) {
2895 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002896 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002897
Peter Lievend75cbb52013-10-24 12:07:03 +02002898 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002899 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002900 if (nb_sectors <= 0) {
2901 return 0;
2902 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002903 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002904 if (ret < 0) {
2905 error_report("error getting block status at sector %" PRId64 ": %s",
2906 sector_num, strerror(-ret));
2907 return ret;
2908 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002909 if (ret & BDRV_BLOCK_ZERO) {
2910 sector_num += n;
2911 continue;
2912 }
2913 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2914 if (ret < 0) {
2915 error_report("error writing zeroes at sector %" PRId64 ": %s",
2916 sector_num, strerror(-ret));
2917 return ret;
2918 }
2919 sector_num += n;
2920 }
2921}
2922
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002923int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002924{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002925 QEMUIOVector qiov;
2926 struct iovec iov = {
2927 .iov_base = (void *)buf,
2928 .iov_len = bytes,
2929 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002930 int ret;
bellard83f64092006-08-01 16:21:11 +00002931
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002932 if (bytes < 0) {
2933 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002934 }
2935
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002936 qemu_iovec_init_external(&qiov, &iov, 1);
2937 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2938 if (ret < 0) {
2939 return ret;
bellard83f64092006-08-01 16:21:11 +00002940 }
2941
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002942 return bytes;
bellard83f64092006-08-01 16:21:11 +00002943}
2944
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002945int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002946{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002947 int ret;
bellard83f64092006-08-01 16:21:11 +00002948
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002949 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2950 if (ret < 0) {
2951 return ret;
bellard83f64092006-08-01 16:21:11 +00002952 }
2953
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002954 return qiov->size;
2955}
2956
2957int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002958 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002959{
2960 QEMUIOVector qiov;
2961 struct iovec iov = {
2962 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002963 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002964 };
2965
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002966 if (bytes < 0) {
2967 return -EINVAL;
2968 }
2969
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002970 qemu_iovec_init_external(&qiov, &iov, 1);
2971 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002972}
bellard83f64092006-08-01 16:21:11 +00002973
Kevin Wolff08145f2010-06-16 16:38:15 +02002974/*
2975 * Writes to the file and ensures that no writes are reordered across this
2976 * request (acts as a barrier)
2977 *
2978 * Returns 0 on success, -errno in error cases.
2979 */
2980int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2981 const void *buf, int count)
2982{
2983 int ret;
2984
2985 ret = bdrv_pwrite(bs, offset, buf, count);
2986 if (ret < 0) {
2987 return ret;
2988 }
2989
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002990 /* No flush needed for cache modes that already do it */
2991 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002992 bdrv_flush(bs);
2993 }
2994
2995 return 0;
2996}
2997
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002998static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002999 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3000{
3001 /* Perform I/O through a temporary buffer so that users who scribble over
3002 * their read buffer while the operation is in progress do not end up
3003 * modifying the image file. This is critical for zero-copy guest I/O
3004 * where anything might happen inside guest memory.
3005 */
3006 void *bounce_buffer;
3007
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003008 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00003009 struct iovec iov;
3010 QEMUIOVector bounce_qiov;
3011 int64_t cluster_sector_num;
3012 int cluster_nb_sectors;
3013 size_t skip_bytes;
3014 int ret;
3015
3016 /* Cover entire cluster so no additional backing file I/O is required when
3017 * allocating cluster in the image file.
3018 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003019 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3020 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003021
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003022 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3023 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003024
3025 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003026 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3027 if (bounce_buffer == NULL) {
3028 ret = -ENOMEM;
3029 goto err;
3030 }
3031
Stefan Hajnocziab185922011-11-17 13:40:31 +00003032 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3033
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003034 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3035 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003036 if (ret < 0) {
3037 goto err;
3038 }
3039
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003040 if (drv->bdrv_co_write_zeroes &&
3041 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003042 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003043 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003044 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003045 /* This does not change the data on the disk, it is not necessary
3046 * to flush even in cache=writethrough mode.
3047 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003048 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003049 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003050 }
3051
Stefan Hajnocziab185922011-11-17 13:40:31 +00003052 if (ret < 0) {
3053 /* It might be okay to ignore write errors for guest requests. If this
3054 * is a deliberate copy-on-read then we don't want to ignore the error.
3055 * Simply report it in all cases.
3056 */
3057 goto err;
3058 }
3059
3060 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003061 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3062 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003063
3064err:
3065 qemu_vfree(bounce_buffer);
3066 return ret;
3067}
3068
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003069/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003070 * Forwards an already correctly aligned request to the BlockDriver. This
3071 * handles copy on read and zeroing after EOF; any other features must be
3072 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003073 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003074static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003075 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003076 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003077{
3078 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003079 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003080
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003081 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3082 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003083
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003084 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3085 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003086 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003087
3088 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003089 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003090 /* If we touch the same cluster it counts as an overlap. This
3091 * guarantees that allocating writes will be serialized and not race
3092 * with each other for the same cluster. For example, in copy-on-read
3093 * it ensures that the CoR read and write operations are atomic and
3094 * guest writes cannot interleave between them. */
3095 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003096 }
3097
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003098 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003099
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003100 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003101 int pnum;
3102
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003103 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003104 if (ret < 0) {
3105 goto out;
3106 }
3107
3108 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003109 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003110 goto out;
3111 }
3112 }
3113
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003114 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003115 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003116 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3117 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003118 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003119 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003120
Markus Armbruster40490822014-06-26 13:23:19 +02003121 total_sectors = bdrv_nb_sectors(bs);
3122 if (total_sectors < 0) {
3123 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003124 goto out;
3125 }
3126
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003127 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3128 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003129 if (nb_sectors < max_nb_sectors) {
3130 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3131 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003132 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003133
3134 qemu_iovec_init(&local_qiov, qiov->niov);
3135 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003136 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003137
Paolo Bonzinie012b782014-12-17 16:09:59 +01003138 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003139 &local_qiov);
3140
3141 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003142 } else {
3143 ret = 0;
3144 }
3145
3146 /* Reading beyond end of file is supposed to produce zeroes */
3147 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3148 uint64_t offset = MAX(0, total_sectors - sector_num);
3149 uint64_t bytes = (sector_num + nb_sectors - offset) *
3150 BDRV_SECTOR_SIZE;
3151 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3152 }
3153 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003154
3155out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003156 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003157}
3158
Fam Zhengfc3959e2015-03-24 09:23:49 +08003159static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3160{
3161 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3162 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3163}
3164
3165static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3166 int64_t offset, size_t bytes)
3167{
3168 int64_t align = bdrv_get_align(bs);
3169 return !(offset & (align - 1) || (bytes & (align - 1)));
3170}
3171
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003172/*
3173 * Handle a read request in coroutine context
3174 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003175static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3176 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003177 BdrvRequestFlags flags)
3178{
3179 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003180 BdrvTrackedRequest req;
3181
Fam Zhengfc3959e2015-03-24 09:23:49 +08003182 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003183 uint8_t *head_buf = NULL;
3184 uint8_t *tail_buf = NULL;
3185 QEMUIOVector local_qiov;
3186 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003187 int ret;
3188
3189 if (!drv) {
3190 return -ENOMEDIUM;
3191 }
Max Reitzb9c64942015-02-05 13:58:25 -05003192
3193 ret = bdrv_check_byte_request(bs, offset, bytes);
3194 if (ret < 0) {
3195 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003196 }
3197
3198 if (bs->copy_on_read) {
3199 flags |= BDRV_REQ_COPY_ON_READ;
3200 }
3201
3202 /* throttling disk I/O */
3203 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003204 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003205 }
3206
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003207 /* Align read if necessary by padding qiov */
3208 if (offset & (align - 1)) {
3209 head_buf = qemu_blockalign(bs, align);
3210 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3211 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3212 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3213 use_local_qiov = true;
3214
3215 bytes += offset & (align - 1);
3216 offset = offset & ~(align - 1);
3217 }
3218
3219 if ((offset + bytes) & (align - 1)) {
3220 if (!use_local_qiov) {
3221 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3222 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3223 use_local_qiov = true;
3224 }
3225 tail_buf = qemu_blockalign(bs, align);
3226 qemu_iovec_add(&local_qiov, tail_buf,
3227 align - ((offset + bytes) & (align - 1)));
3228
3229 bytes = ROUND_UP(bytes, align);
3230 }
3231
Kevin Wolf65afd212013-12-03 14:55:55 +01003232 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003233 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003234 use_local_qiov ? &local_qiov : qiov,
3235 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003236 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003237
3238 if (use_local_qiov) {
3239 qemu_iovec_destroy(&local_qiov);
3240 qemu_vfree(head_buf);
3241 qemu_vfree(tail_buf);
3242 }
3243
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003244 return ret;
3245}
3246
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003247static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3248 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3249 BdrvRequestFlags flags)
3250{
Peter Lieven75af1f32015-02-06 11:54:11 +01003251 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003252 return -EINVAL;
3253 }
3254
3255 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3256 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3257}
3258
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003259int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003260 int nb_sectors, QEMUIOVector *qiov)
3261{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003262 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003263
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003264 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3265}
3266
3267int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3268 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3269{
3270 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3271
3272 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3273 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003274}
3275
Peter Lieven98764152015-02-02 15:48:34 +01003276#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003277
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003278static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003279 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003280{
3281 BlockDriver *drv = bs->drv;
3282 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003283 struct iovec iov = {0};
3284 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003285
Peter Lieven75af1f32015-02-06 11:54:11 +01003286 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3287 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003288
Peter Lievenc31cb702013-10-24 12:06:58 +02003289 while (nb_sectors > 0 && !ret) {
3290 int num = nb_sectors;
3291
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003292 /* Align request. Block drivers can expect the "bulk" of the request
3293 * to be aligned.
3294 */
3295 if (bs->bl.write_zeroes_alignment
3296 && num > bs->bl.write_zeroes_alignment) {
3297 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3298 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003299 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003300 num -= sector_num % bs->bl.write_zeroes_alignment;
3301 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3302 /* Shorten the request to the last aligned sector. num cannot
3303 * underflow because num > bs->bl.write_zeroes_alignment.
3304 */
3305 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003306 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003307 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003308
3309 /* limit request size */
3310 if (num > max_write_zeroes) {
3311 num = max_write_zeroes;
3312 }
3313
3314 ret = -ENOTSUP;
3315 /* First try the efficient write zeroes operation */
3316 if (drv->bdrv_co_write_zeroes) {
3317 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3318 }
3319
3320 if (ret == -ENOTSUP) {
3321 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003322 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003323 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003324 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003325 iov.iov_len = num * BDRV_SECTOR_SIZE;
3326 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003327 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3328 if (iov.iov_base == NULL) {
3329 ret = -ENOMEM;
3330 goto fail;
3331 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003332 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003333 }
3334 qemu_iovec_init_external(&qiov, &iov, 1);
3335
3336 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003337
3338 /* Keep bounce buffer around if it is big enough for all
3339 * all future requests.
3340 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003341 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003342 qemu_vfree(iov.iov_base);
3343 iov.iov_base = NULL;
3344 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003345 }
3346
3347 sector_num += num;
3348 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003349 }
3350
Kevin Wolf857d4f42014-05-20 13:16:51 +02003351fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003352 qemu_vfree(iov.iov_base);
3353 return ret;
3354}
3355
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003356/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003357 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003358 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003359static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003360 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3361 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003362{
3363 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003364 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003365 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003366
Kevin Wolfb404f722013-12-03 14:02:23 +01003367 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3368 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003369
Kevin Wolfb404f722013-12-03 14:02:23 +01003370 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3371 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003372 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003373
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003374 waited = wait_serialising_requests(req);
3375 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003376 assert(req->overlap_offset <= offset);
3377 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003378
Kevin Wolf65afd212013-12-03 14:55:55 +01003379 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003380
Peter Lieven465bee12014-05-18 00:58:19 +02003381 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3382 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3383 qemu_iovec_is_zero(qiov)) {
3384 flags |= BDRV_REQ_ZERO_WRITE;
3385 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3386 flags |= BDRV_REQ_MAY_UNMAP;
3387 }
3388 }
3389
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003390 if (ret < 0) {
3391 /* Do nothing, write notifier decided to fail this request */
3392 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003393 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003394 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003395 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003396 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003397 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3398 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003399 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003400
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003401 if (ret == 0 && !bs->enable_write_cache) {
3402 ret = bdrv_co_flush(bs);
3403 }
3404
Fam Zhenge4654d22013-11-13 18:29:43 +08003405 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003406
Benoît Canet5366d0c2014-09-05 15:46:18 +02003407 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003408
Max Reitzc0191e72015-02-05 13:58:24 -05003409 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003410 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3411 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003412
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003413 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003414}
3415
Kevin Wolfb404f722013-12-03 14:02:23 +01003416/*
3417 * Handle a write request in coroutine context
3418 */
Kevin Wolf66015532013-12-03 14:40:18 +01003419static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3420 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003421 BdrvRequestFlags flags)
3422{
Kevin Wolf65afd212013-12-03 14:55:55 +01003423 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003424 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003425 uint8_t *head_buf = NULL;
3426 uint8_t *tail_buf = NULL;
3427 QEMUIOVector local_qiov;
3428 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003429 int ret;
3430
3431 if (!bs->drv) {
3432 return -ENOMEDIUM;
3433 }
3434 if (bs->read_only) {
3435 return -EACCES;
3436 }
Max Reitzb9c64942015-02-05 13:58:25 -05003437
3438 ret = bdrv_check_byte_request(bs, offset, bytes);
3439 if (ret < 0) {
3440 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003441 }
3442
Kevin Wolfb404f722013-12-03 14:02:23 +01003443 /* throttling disk I/O */
3444 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003445 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003446 }
3447
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003448 /*
3449 * Align write if necessary by performing a read-modify-write cycle.
3450 * Pad qiov with the read parts and be sure to have a tracked request not
3451 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3452 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003453 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003454
3455 if (offset & (align - 1)) {
3456 QEMUIOVector head_qiov;
3457 struct iovec head_iov;
3458
3459 mark_request_serialising(&req, align);
3460 wait_serialising_requests(&req);
3461
3462 head_buf = qemu_blockalign(bs, align);
3463 head_iov = (struct iovec) {
3464 .iov_base = head_buf,
3465 .iov_len = align,
3466 };
3467 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3468
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003469 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003470 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3471 align, &head_qiov, 0);
3472 if (ret < 0) {
3473 goto fail;
3474 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003475 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003476
3477 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3478 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3479 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3480 use_local_qiov = true;
3481
3482 bytes += offset & (align - 1);
3483 offset = offset & ~(align - 1);
3484 }
3485
3486 if ((offset + bytes) & (align - 1)) {
3487 QEMUIOVector tail_qiov;
3488 struct iovec tail_iov;
3489 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003490 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003491
3492 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003493 waited = wait_serialising_requests(&req);
3494 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003495
3496 tail_buf = qemu_blockalign(bs, align);
3497 tail_iov = (struct iovec) {
3498 .iov_base = tail_buf,
3499 .iov_len = align,
3500 };
3501 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3502
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003503 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003504 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3505 align, &tail_qiov, 0);
3506 if (ret < 0) {
3507 goto fail;
3508 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003509 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003510
3511 if (!use_local_qiov) {
3512 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3513 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3514 use_local_qiov = true;
3515 }
3516
3517 tail_bytes = (offset + bytes) & (align - 1);
3518 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3519
3520 bytes = ROUND_UP(bytes, align);
3521 }
3522
Fam Zhengfc3959e2015-03-24 09:23:49 +08003523 if (use_local_qiov) {
3524 /* Local buffer may have non-zero data. */
3525 flags &= ~BDRV_REQ_ZERO_WRITE;
3526 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003527 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3528 use_local_qiov ? &local_qiov : qiov,
3529 flags);
3530
3531fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003532 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003533
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003534 if (use_local_qiov) {
3535 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003536 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003537 qemu_vfree(head_buf);
3538 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003539
Kevin Wolfb404f722013-12-03 14:02:23 +01003540 return ret;
3541}
3542
Kevin Wolf66015532013-12-03 14:40:18 +01003543static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3544 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3545 BdrvRequestFlags flags)
3546{
Peter Lieven75af1f32015-02-06 11:54:11 +01003547 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003548 return -EINVAL;
3549 }
3550
3551 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3552 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3553}
3554
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003555int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3556 int nb_sectors, QEMUIOVector *qiov)
3557{
3558 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3559
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003560 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3561}
3562
3563int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003564 int64_t sector_num, int nb_sectors,
3565 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003566{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003567 int ret;
3568
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003569 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003570
Peter Lievend32f35c2013-10-24 12:06:52 +02003571 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3572 flags &= ~BDRV_REQ_MAY_UNMAP;
3573 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003574 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3575 nb_sectors << BDRV_SECTOR_BITS)) {
3576 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3577 BDRV_REQ_ZERO_WRITE | flags);
3578 } else {
3579 uint8_t *buf;
3580 QEMUIOVector local_qiov;
3581 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003582
Fam Zhengfc3959e2015-03-24 09:23:49 +08003583 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3584 memset(buf, 0, bytes);
3585 qemu_iovec_init(&local_qiov, 1);
3586 qemu_iovec_add(&local_qiov, buf, bytes);
3587
3588 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3589 BDRV_REQ_ZERO_WRITE | flags);
3590 qemu_vfree(buf);
3591 }
3592 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003593}
3594
bellard83f64092006-08-01 16:21:11 +00003595/**
bellard83f64092006-08-01 16:21:11 +00003596 * Truncate file to 'offset' bytes (needed only for file protocols)
3597 */
3598int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3599{
3600 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003601 int ret;
bellard83f64092006-08-01 16:21:11 +00003602 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003603 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003604 if (!drv->bdrv_truncate)
3605 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003606 if (bs->read_only)
3607 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003608
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003609 ret = drv->bdrv_truncate(bs, offset);
3610 if (ret == 0) {
3611 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003612 if (bs->blk) {
3613 blk_dev_resize_cb(bs->blk);
3614 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003615 }
3616 return ret;
bellard83f64092006-08-01 16:21:11 +00003617}
3618
3619/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003620 * Length of a allocated file in bytes. Sparse files are counted by actual
3621 * allocated space. Return < 0 if error or unknown.
3622 */
3623int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3624{
3625 BlockDriver *drv = bs->drv;
3626 if (!drv) {
3627 return -ENOMEDIUM;
3628 }
3629 if (drv->bdrv_get_allocated_file_size) {
3630 return drv->bdrv_get_allocated_file_size(bs);
3631 }
3632 if (bs->file) {
3633 return bdrv_get_allocated_file_size(bs->file);
3634 }
3635 return -ENOTSUP;
3636}
3637
3638/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003639 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003640 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003641int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003642{
3643 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003644
bellard83f64092006-08-01 16:21:11 +00003645 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003646 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003647
Kevin Wolfb94a2612013-10-29 12:18:58 +01003648 if (drv->has_variable_length) {
3649 int ret = refresh_total_sectors(bs, bs->total_sectors);
3650 if (ret < 0) {
3651 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003652 }
bellard83f64092006-08-01 16:21:11 +00003653 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003654 return bs->total_sectors;
3655}
3656
3657/**
3658 * Return length in bytes on success, -errno on error.
3659 * The length is always a multiple of BDRV_SECTOR_SIZE.
3660 */
3661int64_t bdrv_getlength(BlockDriverState *bs)
3662{
3663 int64_t ret = bdrv_nb_sectors(bs);
3664
3665 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003666}
3667
bellard19cb3732006-08-19 11:45:59 +00003668/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003669void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003670{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003671 int64_t nb_sectors = bdrv_nb_sectors(bs);
3672
3673 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003674}
bellardcf989512004-02-16 21:56:36 +00003675
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003676void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3677 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003678{
3679 bs->on_read_error = on_read_error;
3680 bs->on_write_error = on_write_error;
3681}
3682
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003683BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003684{
3685 return is_read ? bs->on_read_error : bs->on_write_error;
3686}
3687
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003688BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3689{
3690 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3691
3692 switch (on_err) {
3693 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003694 return (error == ENOSPC) ?
3695 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003696 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003697 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003698 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003699 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003700 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003701 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003702 default:
3703 abort();
3704 }
3705}
3706
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003707static void send_qmp_error_event(BlockDriverState *bs,
3708 BlockErrorAction action,
3709 bool is_read, int error)
3710{
Peter Maydell573742a2014-10-10 20:33:03 +01003711 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003712
Peter Maydell573742a2014-10-10 20:33:03 +01003713 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3714 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003715 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003716 error == ENOSPC, strerror(error),
3717 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003718}
3719
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003720/* This is done by device models because, while the block layer knows
3721 * about the error, it does not know whether an operation comes from
3722 * the device or the block layer (from a job, for example).
3723 */
3724void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3725 bool is_read, int error)
3726{
3727 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003728
Wenchao Xiaa5895692014-06-18 08:43:30 +02003729 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003730 /* First set the iostatus, so that "info block" returns an iostatus
3731 * that matches the events raised so far (an additional error iostatus
3732 * is fine, but not a lost one).
3733 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003734 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003735
3736 /* Then raise the request to stop the VM and the event.
3737 * qemu_system_vmstop_request_prepare has two effects. First,
3738 * it ensures that the STOP event always comes after the
3739 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3740 * can observe the STOP event and do a "cont" before the STOP
3741 * event is issued, the VM will not stop. In this case, vm_start()
3742 * also ensures that the STOP/RESUME pair of events is emitted.
3743 */
3744 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003745 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003746 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3747 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003748 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003749 }
3750}
3751
bellardb3380822004-03-14 21:38:54 +00003752int bdrv_is_read_only(BlockDriverState *bs)
3753{
3754 return bs->read_only;
3755}
3756
ths985a03b2007-12-24 16:10:43 +00003757int bdrv_is_sg(BlockDriverState *bs)
3758{
3759 return bs->sg;
3760}
3761
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003762int bdrv_enable_write_cache(BlockDriverState *bs)
3763{
3764 return bs->enable_write_cache;
3765}
3766
Paolo Bonzini425b0142012-06-06 00:04:52 +02003767void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3768{
3769 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003770
3771 /* so a reopen() will preserve wce */
3772 if (wce) {
3773 bs->open_flags |= BDRV_O_CACHE_WB;
3774 } else {
3775 bs->open_flags &= ~BDRV_O_CACHE_WB;
3776 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003777}
3778
bellardea2384d2004-08-01 21:59:26 +00003779int bdrv_is_encrypted(BlockDriverState *bs)
3780{
3781 if (bs->backing_hd && bs->backing_hd->encrypted)
3782 return 1;
3783 return bs->encrypted;
3784}
3785
aliguoric0f4ce72009-03-05 23:01:01 +00003786int bdrv_key_required(BlockDriverState *bs)
3787{
3788 BlockDriverState *backing_hd = bs->backing_hd;
3789
3790 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3791 return 1;
3792 return (bs->encrypted && !bs->valid_key);
3793}
3794
bellardea2384d2004-08-01 21:59:26 +00003795int bdrv_set_key(BlockDriverState *bs, const char *key)
3796{
3797 int ret;
3798 if (bs->backing_hd && bs->backing_hd->encrypted) {
3799 ret = bdrv_set_key(bs->backing_hd, key);
3800 if (ret < 0)
3801 return ret;
3802 if (!bs->encrypted)
3803 return 0;
3804 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003805 if (!bs->encrypted) {
3806 return -EINVAL;
3807 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3808 return -ENOMEDIUM;
3809 }
aliguoric0f4ce72009-03-05 23:01:01 +00003810 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003811 if (ret < 0) {
3812 bs->valid_key = 0;
3813 } else if (!bs->valid_key) {
3814 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003815 if (bs->blk) {
3816 /* call the change callback now, we skipped it on open */
3817 blk_dev_change_media_cb(bs->blk, true);
3818 }
aliguoribb5fc202009-03-05 23:01:15 +00003819 }
aliguoric0f4ce72009-03-05 23:01:01 +00003820 return ret;
bellardea2384d2004-08-01 21:59:26 +00003821}
3822
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003823/*
3824 * Provide an encryption key for @bs.
3825 * If @key is non-null:
3826 * If @bs is not encrypted, fail.
3827 * Else if the key is invalid, fail.
3828 * Else set @bs's key to @key, replacing the existing key, if any.
3829 * If @key is null:
3830 * If @bs is encrypted and still lacks a key, fail.
3831 * Else do nothing.
3832 * On failure, store an error object through @errp if non-null.
3833 */
3834void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3835{
3836 if (key) {
3837 if (!bdrv_is_encrypted(bs)) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03003838 error_setg(errp, "Node '%s' is not encrypted",
3839 bdrv_get_device_or_node_name(bs));
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003840 } else if (bdrv_set_key(bs, key) < 0) {
3841 error_set(errp, QERR_INVALID_PASSWORD);
3842 }
3843 } else {
3844 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003845 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3846 "'%s' (%s) is encrypted",
Alberto Garcia81e5f782015-04-08 12:29:19 +03003847 bdrv_get_device_or_node_name(bs),
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003848 bdrv_get_encrypted_filename(bs));
3849 }
3850 }
3851}
3852
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003853const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003854{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003855 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003856}
3857
Stefan Hajnocziada42402014-08-27 12:08:55 +01003858static int qsort_strcmp(const void *a, const void *b)
3859{
3860 return strcmp(a, b);
3861}
3862
ths5fafdf22007-09-16 21:08:06 +00003863void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003864 void *opaque)
3865{
3866 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003867 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003868 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003869 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003870
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003871 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003872 if (drv->format_name) {
3873 bool found = false;
3874 int i = count;
3875 while (formats && i && !found) {
3876 found = !strcmp(formats[--i], drv->format_name);
3877 }
3878
3879 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003880 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003881 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003882 }
3883 }
bellardea2384d2004-08-01 21:59:26 +00003884 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003885
3886 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3887
3888 for (i = 0; i < count; i++) {
3889 it(opaque, formats[i]);
3890 }
3891
Jeff Codye855e4f2014-04-28 18:29:54 -04003892 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003893}
3894
Benoît Canetdc364f42014-01-23 21:31:32 +01003895/* This function is to find a node in the bs graph */
3896BlockDriverState *bdrv_find_node(const char *node_name)
3897{
3898 BlockDriverState *bs;
3899
3900 assert(node_name);
3901
3902 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3903 if (!strcmp(node_name, bs->node_name)) {
3904 return bs;
3905 }
3906 }
3907 return NULL;
3908}
3909
Benoît Canetc13163f2014-01-23 21:31:34 +01003910/* Put this QMP function here so it can access the static graph_bdrv_states. */
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003911BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
Benoît Canetc13163f2014-01-23 21:31:34 +01003912{
3913 BlockDeviceInfoList *list, *entry;
3914 BlockDriverState *bs;
3915
3916 list = NULL;
3917 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003918 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3919 if (!info) {
3920 qapi_free_BlockDeviceInfoList(list);
3921 return NULL;
3922 }
Benoît Canetc13163f2014-01-23 21:31:34 +01003923 entry = g_malloc0(sizeof(*entry));
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003924 entry->value = info;
Benoît Canetc13163f2014-01-23 21:31:34 +01003925 entry->next = list;
3926 list = entry;
3927 }
3928
3929 return list;
3930}
3931
Benoît Canet12d3ba82014-01-23 21:31:35 +01003932BlockDriverState *bdrv_lookup_bs(const char *device,
3933 const char *node_name,
3934 Error **errp)
3935{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003936 BlockBackend *blk;
3937 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003938
Benoît Canet12d3ba82014-01-23 21:31:35 +01003939 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003940 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003941
Markus Armbruster7f06d472014-10-07 13:59:12 +02003942 if (blk) {
3943 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003944 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003945 }
3946
Benoît Canetdd67fa52014-02-12 17:15:06 +01003947 if (node_name) {
3948 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003949
Benoît Canetdd67fa52014-02-12 17:15:06 +01003950 if (bs) {
3951 return bs;
3952 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003953 }
3954
Benoît Canetdd67fa52014-02-12 17:15:06 +01003955 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3956 device ? device : "",
3957 node_name ? node_name : "");
3958 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003959}
3960
Jeff Cody5a6684d2014-06-25 15:40:09 -04003961/* If 'base' is in the same chain as 'top', return true. Otherwise,
3962 * return false. If either argument is NULL, return false. */
3963bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3964{
3965 while (top && top != base) {
3966 top = top->backing_hd;
3967 }
3968
3969 return top != NULL;
3970}
3971
Fam Zheng04df7652014-10-31 11:32:54 +08003972BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3973{
3974 if (!bs) {
3975 return QTAILQ_FIRST(&graph_bdrv_states);
3976 }
3977 return QTAILQ_NEXT(bs, node_list);
3978}
3979
Markus Armbruster2f399b02010-06-02 18:55:20 +02003980BlockDriverState *bdrv_next(BlockDriverState *bs)
3981{
3982 if (!bs) {
3983 return QTAILQ_FIRST(&bdrv_states);
3984 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003985 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003986}
3987
Fam Zheng20a9e772014-10-31 11:32:55 +08003988const char *bdrv_get_node_name(const BlockDriverState *bs)
3989{
3990 return bs->node_name;
3991}
3992
Markus Armbruster7f06d472014-10-07 13:59:12 +02003993/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003994const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003995{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003996 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003997}
3998
Alberto Garcia9b2aa842015-04-08 12:29:18 +03003999/* This can be used to identify nodes that might not have a device
4000 * name associated. Since node and device names live in the same
4001 * namespace, the result is unambiguous. The exception is if both are
4002 * absent, then this returns an empty (non-null) string. */
4003const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
4004{
4005 return bs->blk ? blk_name(bs->blk) : bs->node_name;
4006}
4007
Markus Armbrusterc8433282012-06-05 16:49:24 +02004008int bdrv_get_flags(BlockDriverState *bs)
4009{
4010 return bs->open_flags;
4011}
4012
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004013int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00004014{
4015 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004016 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00004017
Benoît Canetdc364f42014-01-23 21:31:32 +01004018 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004019 AioContext *aio_context = bdrv_get_aio_context(bs);
4020 int ret;
4021
4022 aio_context_acquire(aio_context);
4023 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004024 if (ret < 0 && !result) {
4025 result = ret;
4026 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004027 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01004028 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004029
4030 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00004031}
4032
Peter Lieven3ac21622013-06-28 12:47:42 +02004033int bdrv_has_zero_init_1(BlockDriverState *bs)
4034{
4035 return 1;
4036}
4037
Kevin Wolff2feebb2010-04-14 17:30:35 +02004038int bdrv_has_zero_init(BlockDriverState *bs)
4039{
4040 assert(bs->drv);
4041
Paolo Bonzini11212d82013-09-04 19:00:27 +02004042 /* If BS is a copy on write image, it is initialized to
4043 the contents of the base image, which may not be zeroes. */
4044 if (bs->backing_hd) {
4045 return 0;
4046 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004047 if (bs->drv->bdrv_has_zero_init) {
4048 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004049 }
4050
Peter Lieven3ac21622013-06-28 12:47:42 +02004051 /* safe default */
4052 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004053}
4054
Peter Lieven4ce78692013-10-24 12:06:54 +02004055bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4056{
4057 BlockDriverInfo bdi;
4058
4059 if (bs->backing_hd) {
4060 return false;
4061 }
4062
4063 if (bdrv_get_info(bs, &bdi) == 0) {
4064 return bdi.unallocated_blocks_are_zero;
4065 }
4066
4067 return false;
4068}
4069
4070bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4071{
4072 BlockDriverInfo bdi;
4073
4074 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4075 return false;
4076 }
4077
4078 if (bdrv_get_info(bs, &bdi) == 0) {
4079 return bdi.can_write_zeroes_with_unmap;
4080 }
4081
4082 return false;
4083}
4084
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004085typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004086 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004087 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004088 int64_t sector_num;
4089 int nb_sectors;
4090 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004091 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004092 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004093} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004094
thsf58c7b32008-06-05 21:53:49 +00004095/*
Fam Zheng705be722014-11-10 17:10:38 +08004096 * Returns the allocation status of the specified sectors.
4097 * Drivers not implementing the functionality are assumed to not support
4098 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004099 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004100 * If 'sector_num' is beyond the end of the disk image the return value is 0
4101 * and 'pnum' is set to 0.
4102 *
thsf58c7b32008-06-05 21:53:49 +00004103 * 'pnum' is set to the number of sectors (including and immediately following
4104 * the specified sector) that are known to be in the same
4105 * allocated/unallocated state.
4106 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004107 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4108 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004109 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004110static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4111 int64_t sector_num,
4112 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004113{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004114 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004115 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004116 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004117
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004118 total_sectors = bdrv_nb_sectors(bs);
4119 if (total_sectors < 0) {
4120 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004121 }
4122
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004123 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004124 *pnum = 0;
4125 return 0;
4126 }
4127
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004128 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004129 if (n < nb_sectors) {
4130 nb_sectors = n;
4131 }
4132
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004133 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004134 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004135 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004136 if (bs->drv->protocol_name) {
4137 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4138 }
4139 return ret;
thsf58c7b32008-06-05 21:53:49 +00004140 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004141
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004142 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4143 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004144 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004145 return ret;
4146 }
4147
Peter Lieven92bc50a2013-10-08 14:43:14 +02004148 if (ret & BDRV_BLOCK_RAW) {
4149 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4150 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4151 *pnum, pnum);
4152 }
4153
Kevin Wolfe88ae222014-05-06 15:25:36 +02004154 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4155 ret |= BDRV_BLOCK_ALLOCATED;
4156 }
4157
Peter Lievenc3d86882013-10-24 12:07:04 +02004158 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4159 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004160 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004161 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004162 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004163 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4164 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004165 ret |= BDRV_BLOCK_ZERO;
4166 }
4167 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004168 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004169
4170 if (bs->file &&
4171 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4172 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004173 int file_pnum;
4174
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004175 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004176 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004177 if (ret2 >= 0) {
4178 /* Ignore errors. This is just providing extra information, it
4179 * is useful but not necessary.
4180 */
Max Reitz59c9a952014-10-22 17:00:15 +02004181 if (!file_pnum) {
4182 /* !file_pnum indicates an offset at or beyond the EOF; it is
4183 * perfectly valid for the format block driver to point to such
4184 * offsets, so catch it and mark everything as zero */
4185 ret |= BDRV_BLOCK_ZERO;
4186 } else {
4187 /* Limit request to the range reported by the protocol driver */
4188 *pnum = file_pnum;
4189 ret |= (ret2 & BDRV_BLOCK_ZERO);
4190 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004191 }
4192 }
4193
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004194 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004195}
4196
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004197/* Coroutine wrapper for bdrv_get_block_status() */
4198static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004199{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004200 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004201 BlockDriverState *bs = data->bs;
4202
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004203 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4204 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004205 data->done = true;
4206}
4207
4208/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004209 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004210 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004211 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004212 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004213int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4214 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004215{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004216 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004217 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004218 .bs = bs,
4219 .sector_num = sector_num,
4220 .nb_sectors = nb_sectors,
4221 .pnum = pnum,
4222 .done = false,
4223 };
4224
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004225 if (qemu_in_coroutine()) {
4226 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004227 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004228 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004229 AioContext *aio_context = bdrv_get_aio_context(bs);
4230
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004231 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004232 qemu_coroutine_enter(co, &data);
4233 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004234 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004235 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004236 }
4237 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004238}
4239
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004240int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4241 int nb_sectors, int *pnum)
4242{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004243 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4244 if (ret < 0) {
4245 return ret;
4246 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004247 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004248}
4249
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004250/*
4251 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4252 *
4253 * Return true if the given sector is allocated in any image between
4254 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4255 * sector is allocated in any image of the chain. Return false otherwise.
4256 *
4257 * 'pnum' is set to the number of sectors (including and immediately following
4258 * the specified sector) that are known to be in the same
4259 * allocated/unallocated state.
4260 *
4261 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004262int bdrv_is_allocated_above(BlockDriverState *top,
4263 BlockDriverState *base,
4264 int64_t sector_num,
4265 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004266{
4267 BlockDriverState *intermediate;
4268 int ret, n = nb_sectors;
4269
4270 intermediate = top;
4271 while (intermediate && intermediate != base) {
4272 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004273 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4274 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004275 if (ret < 0) {
4276 return ret;
4277 } else if (ret) {
4278 *pnum = pnum_inter;
4279 return 1;
4280 }
4281
4282 /*
4283 * [sector_num, nb_sectors] is unallocated on top but intermediate
4284 * might have
4285 *
4286 * [sector_num+x, nr_sectors] allocated.
4287 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004288 if (n > pnum_inter &&
4289 (intermediate == top ||
4290 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004291 n = pnum_inter;
4292 }
4293
4294 intermediate = intermediate->backing_hd;
4295 }
4296
4297 *pnum = n;
4298 return 0;
4299}
4300
aliguori045df332009-03-05 23:00:48 +00004301const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4302{
4303 if (bs->backing_hd && bs->backing_hd->encrypted)
4304 return bs->backing_file;
4305 else if (bs->encrypted)
4306 return bs->filename;
4307 else
4308 return NULL;
4309}
4310
ths5fafdf22007-09-16 21:08:06 +00004311void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004312 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004313{
Kevin Wolf3574c602011-10-26 11:02:11 +02004314 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004315}
4316
ths5fafdf22007-09-16 21:08:06 +00004317int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004318 const uint8_t *buf, int nb_sectors)
4319{
4320 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004321 int ret;
4322
4323 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004324 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004325 }
4326 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004327 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004328 }
4329 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4330 if (ret < 0) {
4331 return ret;
4332 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004333
Fam Zhenge4654d22013-11-13 18:29:43 +08004334 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004335
bellardfaea38e2006-08-05 21:31:00 +00004336 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4337}
ths3b46e622007-09-17 08:09:54 +00004338
bellardfaea38e2006-08-05 21:31:00 +00004339int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4340{
4341 BlockDriver *drv = bs->drv;
4342 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004343 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004344 if (!drv->bdrv_get_info)
4345 return -ENOTSUP;
4346 memset(bdi, 0, sizeof(*bdi));
4347 return drv->bdrv_get_info(bs, bdi);
4348}
4349
Max Reitzeae041f2013-10-09 10:46:16 +02004350ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4351{
4352 BlockDriver *drv = bs->drv;
4353 if (drv && drv->bdrv_get_specific_info) {
4354 return drv->bdrv_get_specific_info(bs);
4355 }
4356 return NULL;
4357}
4358
Christoph Hellwig45566e92009-07-10 23:11:57 +02004359int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4360 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004361{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004362 QEMUIOVector qiov;
4363 struct iovec iov = {
4364 .iov_base = (void *) buf,
4365 .iov_len = size,
4366 };
4367
4368 qemu_iovec_init_external(&qiov, &iov, 1);
4369 return bdrv_writev_vmstate(bs, &qiov, pos);
4370}
4371
4372int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4373{
aliguori178e08a2009-04-05 19:10:55 +00004374 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004375
4376 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004377 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004378 } else if (drv->bdrv_save_vmstate) {
4379 return drv->bdrv_save_vmstate(bs, qiov, pos);
4380 } else if (bs->file) {
4381 return bdrv_writev_vmstate(bs->file, qiov, pos);
4382 }
4383
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004384 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004385}
4386
Christoph Hellwig45566e92009-07-10 23:11:57 +02004387int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4388 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004389{
4390 BlockDriver *drv = bs->drv;
4391 if (!drv)
4392 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004393 if (drv->bdrv_load_vmstate)
4394 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4395 if (bs->file)
4396 return bdrv_load_vmstate(bs->file, buf, pos, size);
4397 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004398}
4399
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004400void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4401{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004402 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004403 return;
4404 }
4405
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004406 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004407}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004408
Kevin Wolf41c695c2012-12-06 14:32:58 +01004409int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4410 const char *tag)
4411{
4412 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4413 bs = bs->file;
4414 }
4415
4416 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4417 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4418 }
4419
4420 return -ENOTSUP;
4421}
4422
Fam Zheng4cc70e92013-11-20 10:01:54 +08004423int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4424{
4425 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4426 bs = bs->file;
4427 }
4428
4429 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4430 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4431 }
4432
4433 return -ENOTSUP;
4434}
4435
Kevin Wolf41c695c2012-12-06 14:32:58 +01004436int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4437{
Max Reitz938789e2014-03-10 23:44:08 +01004438 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004439 bs = bs->file;
4440 }
4441
4442 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4443 return bs->drv->bdrv_debug_resume(bs, tag);
4444 }
4445
4446 return -ENOTSUP;
4447}
4448
4449bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4450{
4451 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4452 bs = bs->file;
4453 }
4454
4455 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4456 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4457 }
4458
4459 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004460}
4461
Blue Swirl199630b2010-07-25 20:49:34 +00004462int bdrv_is_snapshot(BlockDriverState *bs)
4463{
4464 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4465}
4466
Jeff Codyb1b1d782012-10-16 15:49:09 -04004467/* backing_file can either be relative, or absolute, or a protocol. If it is
4468 * relative, it must be relative to the chain. So, passing in bs->filename
4469 * from a BDS as backing_file should not be done, as that may be relative to
4470 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004471BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4472 const char *backing_file)
4473{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004474 char *filename_full = NULL;
4475 char *backing_file_full = NULL;
4476 char *filename_tmp = NULL;
4477 int is_protocol = 0;
4478 BlockDriverState *curr_bs = NULL;
4479 BlockDriverState *retval = NULL;
4480
4481 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004482 return NULL;
4483 }
4484
Jeff Codyb1b1d782012-10-16 15:49:09 -04004485 filename_full = g_malloc(PATH_MAX);
4486 backing_file_full = g_malloc(PATH_MAX);
4487 filename_tmp = g_malloc(PATH_MAX);
4488
4489 is_protocol = path_has_protocol(backing_file);
4490
4491 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4492
4493 /* If either of the filename paths is actually a protocol, then
4494 * compare unmodified paths; otherwise make paths relative */
4495 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4496 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4497 retval = curr_bs->backing_hd;
4498 break;
4499 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004500 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004501 /* If not an absolute filename path, make it relative to the current
4502 * image's filename path */
4503 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4504 backing_file);
4505
4506 /* We are going to compare absolute pathnames */
4507 if (!realpath(filename_tmp, filename_full)) {
4508 continue;
4509 }
4510
4511 /* We need to make sure the backing filename we are comparing against
4512 * is relative to the current image filename (or absolute) */
4513 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4514 curr_bs->backing_file);
4515
4516 if (!realpath(filename_tmp, backing_file_full)) {
4517 continue;
4518 }
4519
4520 if (strcmp(backing_file_full, filename_full) == 0) {
4521 retval = curr_bs->backing_hd;
4522 break;
4523 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004524 }
4525 }
4526
Jeff Codyb1b1d782012-10-16 15:49:09 -04004527 g_free(filename_full);
4528 g_free(backing_file_full);
4529 g_free(filename_tmp);
4530 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004531}
4532
Benoît Canetf198fd12012-08-02 10:22:47 +02004533int bdrv_get_backing_file_depth(BlockDriverState *bs)
4534{
4535 if (!bs->drv) {
4536 return 0;
4537 }
4538
4539 if (!bs->backing_hd) {
4540 return 0;
4541 }
4542
4543 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4544}
4545
bellard83f64092006-08-01 16:21:11 +00004546/**************************************************************/
4547/* async I/Os */
4548
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004549BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4550 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004551 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004552{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004553 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4554
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004555 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004556 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004557}
4558
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004559BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4560 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004561 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004562{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004563 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4564
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004565 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004566 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004567}
4568
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004569BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004570 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004571 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004572{
4573 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4574
4575 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4576 BDRV_REQ_ZERO_WRITE | flags,
4577 cb, opaque, true);
4578}
4579
Kevin Wolf40b4f532009-09-09 17:53:37 +02004580
4581typedef struct MultiwriteCB {
4582 int error;
4583 int num_requests;
4584 int num_callbacks;
4585 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004586 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004587 void *opaque;
4588 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004589 } callbacks[];
4590} MultiwriteCB;
4591
4592static void multiwrite_user_cb(MultiwriteCB *mcb)
4593{
4594 int i;
4595
4596 for (i = 0; i < mcb->num_callbacks; i++) {
4597 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004598 if (mcb->callbacks[i].free_qiov) {
4599 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4600 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004601 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004602 }
4603}
4604
4605static void multiwrite_cb(void *opaque, int ret)
4606{
4607 MultiwriteCB *mcb = opaque;
4608
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004609 trace_multiwrite_cb(mcb, ret);
4610
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004611 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004612 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004613 }
4614
4615 mcb->num_requests--;
4616 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004617 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004618 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004619 }
4620}
4621
4622static int multiwrite_req_compare(const void *a, const void *b)
4623{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004624 const BlockRequest *req1 = a, *req2 = b;
4625
4626 /*
4627 * Note that we can't simply subtract req2->sector from req1->sector
4628 * here as that could overflow the return value.
4629 */
4630 if (req1->sector > req2->sector) {
4631 return 1;
4632 } else if (req1->sector < req2->sector) {
4633 return -1;
4634 } else {
4635 return 0;
4636 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004637}
4638
4639/*
4640 * Takes a bunch of requests and tries to merge them. Returns the number of
4641 * requests that remain after merging.
4642 */
4643static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4644 int num_reqs, MultiwriteCB *mcb)
4645{
4646 int i, outidx;
4647
4648 // Sort requests by start sector
4649 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4650
4651 // Check if adjacent requests touch the same clusters. If so, combine them,
4652 // filling up gaps with zero sectors.
4653 outidx = 0;
4654 for (i = 1; i < num_reqs; i++) {
4655 int merge = 0;
4656 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4657
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004658 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004659 if (reqs[i].sector <= oldreq_last) {
4660 merge = 1;
4661 }
4662
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004663 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4664 merge = 0;
4665 }
4666
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004667 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4668 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4669 merge = 0;
4670 }
4671
Kevin Wolf40b4f532009-09-09 17:53:37 +02004672 if (merge) {
4673 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004674 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004675 qemu_iovec_init(qiov,
4676 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4677
4678 // Add the first request to the merged one. If the requests are
4679 // overlapping, drop the last sectors of the first request.
4680 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004681 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004682
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004683 // We should need to add any zeros between the two requests
4684 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004685
4686 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004687 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004688
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004689 // Add tail of first request, if necessary
4690 if (qiov->size < reqs[outidx].qiov->size) {
4691 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4692 reqs[outidx].qiov->size - qiov->size);
4693 }
4694
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004695 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004696 reqs[outidx].qiov = qiov;
4697
4698 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4699 } else {
4700 outidx++;
4701 reqs[outidx].sector = reqs[i].sector;
4702 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4703 reqs[outidx].qiov = reqs[i].qiov;
4704 }
4705 }
4706
Peter Lievenf4564d52015-02-02 14:52:18 +01004707 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4708
Kevin Wolf40b4f532009-09-09 17:53:37 +02004709 return outidx + 1;
4710}
4711
4712/*
4713 * Submit multiple AIO write requests at once.
4714 *
4715 * On success, the function returns 0 and all requests in the reqs array have
4716 * been submitted. In error case this function returns -1, and any of the
4717 * requests may or may not be submitted yet. In particular, this means that the
4718 * callback will be called for some of the requests, for others it won't. The
4719 * caller must check the error field of the BlockRequest to wait for the right
4720 * callbacks (if error != 0, no callback will be called).
4721 *
4722 * The implementation may modify the contents of the reqs array, e.g. to merge
4723 * requests. However, the fields opaque and error are left unmodified as they
4724 * are used to signal failure for a single request to the caller.
4725 */
4726int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4727{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004728 MultiwriteCB *mcb;
4729 int i;
4730
Ryan Harper301db7c2011-03-07 10:01:04 -06004731 /* don't submit writes if we don't have a medium */
4732 if (bs->drv == NULL) {
4733 for (i = 0; i < num_reqs; i++) {
4734 reqs[i].error = -ENOMEDIUM;
4735 }
4736 return -1;
4737 }
4738
Kevin Wolf40b4f532009-09-09 17:53:37 +02004739 if (num_reqs == 0) {
4740 return 0;
4741 }
4742
4743 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004744 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004745 mcb->num_requests = 0;
4746 mcb->num_callbacks = num_reqs;
4747
4748 for (i = 0; i < num_reqs; i++) {
4749 mcb->callbacks[i].cb = reqs[i].cb;
4750 mcb->callbacks[i].opaque = reqs[i].opaque;
4751 }
4752
4753 // Check for mergable requests
4754 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4755
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004756 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4757
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004758 /* Run the aio requests. */
4759 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004760 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004761 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4762 reqs[i].nb_sectors, reqs[i].flags,
4763 multiwrite_cb, mcb,
4764 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004765 }
4766
4767 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004768}
4769
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004770void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004771{
Fam Zhengca5fd112014-09-11 13:41:27 +08004772 qemu_aio_ref(acb);
4773 bdrv_aio_cancel_async(acb);
4774 while (acb->refcnt > 1) {
4775 if (acb->aiocb_info->get_aio_context) {
4776 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4777 } else if (acb->bs) {
4778 aio_poll(bdrv_get_aio_context(acb->bs), true);
4779 } else {
4780 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004781 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004782 }
Fam Zheng80074292014-09-11 13:41:28 +08004783 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004784}
4785
4786/* Async version of aio cancel. The caller is not blocked if the acb implements
4787 * cancel_async, otherwise we do nothing and let the request normally complete.
4788 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004789void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004790{
4791 if (acb->aiocb_info->cancel_async) {
4792 acb->aiocb_info->cancel_async(acb);
4793 }
bellard83f64092006-08-01 16:21:11 +00004794}
4795
4796/**************************************************************/
4797/* async block device emulation */
4798
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004799typedef struct BlockAIOCBSync {
4800 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004801 QEMUBH *bh;
4802 int ret;
4803 /* vector translation state */
4804 QEMUIOVector *qiov;
4805 uint8_t *bounce;
4806 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004807} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004808
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004809static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004810 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004811};
4812
bellard83f64092006-08-01 16:21:11 +00004813static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004814{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004815 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004816
Kevin Wolf857d4f42014-05-20 13:16:51 +02004817 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004818 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004819 }
aliguoriceb42de2009-04-07 18:43:28 +00004820 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004821 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004822 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004823 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004824 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004825}
bellardbeac80c2006-06-26 20:08:57 +00004826
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004827static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4828 int64_t sector_num,
4829 QEMUIOVector *qiov,
4830 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004831 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004832 void *opaque,
4833 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004834
bellardea2384d2004-08-01 21:59:26 +00004835{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004836 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004837
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004838 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004839 acb->is_write = is_write;
4840 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004841 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004842 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004843
Kevin Wolf857d4f42014-05-20 13:16:51 +02004844 if (acb->bounce == NULL) {
4845 acb->ret = -ENOMEM;
4846 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004847 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004848 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004849 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004850 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004851 }
4852
pbrookce1a14d2006-08-07 02:38:06 +00004853 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004854
pbrookce1a14d2006-08-07 02:38:06 +00004855 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004856}
4857
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004858static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004859 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004860 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004861{
aliguorif141eaf2009-04-07 18:43:24 +00004862 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004863}
4864
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004865static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004866 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004867 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004868{
4869 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4870}
4871
Kevin Wolf68485422011-06-30 10:05:46 +02004872
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004873typedef struct BlockAIOCBCoroutine {
4874 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004875 BlockRequest req;
4876 bool is_write;
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004877 bool need_bh;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004878 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004879 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004880} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004881
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004882static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004883 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004884};
4885
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004886static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
4887{
4888 if (!acb->need_bh) {
4889 acb->common.cb(acb->common.opaque, acb->req.error);
4890 qemu_aio_unref(acb);
4891 }
4892}
4893
Paolo Bonzini35246a62011-10-14 10:41:29 +02004894static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004895{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004896 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004897
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004898 assert(!acb->need_bh);
Kevin Wolf68485422011-06-30 10:05:46 +02004899 qemu_bh_delete(acb->bh);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004900 bdrv_co_complete(acb);
4901}
4902
4903static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
4904{
4905 acb->need_bh = false;
4906 if (acb->req.error != -EINPROGRESS) {
4907 BlockDriverState *bs = acb->common.bs;
4908
4909 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4910 qemu_bh_schedule(acb->bh);
4911 }
Kevin Wolf68485422011-06-30 10:05:46 +02004912}
4913
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004914/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4915static void coroutine_fn bdrv_co_do_rw(void *opaque)
4916{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004917 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004918 BlockDriverState *bs = acb->common.bs;
4919
4920 if (!acb->is_write) {
4921 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004922 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004923 } else {
4924 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004925 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004926 }
4927
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004928 bdrv_co_complete(acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004929}
4930
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004931static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4932 int64_t sector_num,
4933 QEMUIOVector *qiov,
4934 int nb_sectors,
4935 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004936 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004937 void *opaque,
4938 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004939{
4940 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004941 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004942
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004943 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004944 acb->need_bh = true;
4945 acb->req.error = -EINPROGRESS;
Kevin Wolf68485422011-06-30 10:05:46 +02004946 acb->req.sector = sector_num;
4947 acb->req.nb_sectors = nb_sectors;
4948 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004949 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004950 acb->is_write = is_write;
4951
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004952 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004953 qemu_coroutine_enter(co, acb);
4954
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004955 bdrv_co_maybe_schedule_bh(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004956 return &acb->common;
4957}
4958
Paolo Bonzini07f07612011-10-17 12:32:12 +02004959static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004960{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004961 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004962 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004963
Paolo Bonzini07f07612011-10-17 12:32:12 +02004964 acb->req.error = bdrv_co_flush(bs);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004965 bdrv_co_complete(acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004966}
4967
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004968BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004969 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004970{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004971 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004972
Paolo Bonzini07f07612011-10-17 12:32:12 +02004973 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004974 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004975
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004976 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004977 acb->need_bh = true;
4978 acb->req.error = -EINPROGRESS;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004979
Paolo Bonzini07f07612011-10-17 12:32:12 +02004980 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4981 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004982
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004983 bdrv_co_maybe_schedule_bh(acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004984 return &acb->common;
4985}
4986
Paolo Bonzini4265d622011-10-17 12:32:14 +02004987static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4988{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004989 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004990 BlockDriverState *bs = acb->common.bs;
4991
4992 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004993 bdrv_co_complete(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004994}
4995
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004996BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004997 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004998 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004999{
5000 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005001 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005002
5003 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
5004
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005005 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005006 acb->need_bh = true;
5007 acb->req.error = -EINPROGRESS;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005008 acb->req.sector = sector_num;
5009 acb->req.nb_sectors = nb_sectors;
5010 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
5011 qemu_coroutine_enter(co, acb);
5012
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005013 bdrv_co_maybe_schedule_bh(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005014 return &acb->common;
5015}
5016
bellardea2384d2004-08-01 21:59:26 +00005017void bdrv_init(void)
5018{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05005019 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00005020}
pbrookce1a14d2006-08-07 02:38:06 +00005021
Markus Armbrustereb852012009-10-27 18:41:44 +01005022void bdrv_init_with_whitelist(void)
5023{
5024 use_bdrv_whitelist = 1;
5025 bdrv_init();
5026}
5027
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005028void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02005029 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00005030{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005031 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00005032
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005033 acb = g_slice_alloc(aiocb_info->aiocb_size);
5034 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00005035 acb->bs = bs;
5036 acb->cb = cb;
5037 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08005038 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00005039 return acb;
5040}
5041
Fam Zhengf197fe22014-09-11 13:41:08 +08005042void qemu_aio_ref(void *p)
5043{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005044 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005045 acb->refcnt++;
5046}
5047
Fam Zheng80074292014-09-11 13:41:28 +08005048void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00005049{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005050 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005051 assert(acb->refcnt > 0);
5052 if (--acb->refcnt == 0) {
5053 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5054 }
pbrookce1a14d2006-08-07 02:38:06 +00005055}
bellard19cb3732006-08-19 11:45:59 +00005056
5057/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005058/* Coroutine block device emulation */
5059
5060typedef struct CoroutineIOCompletion {
5061 Coroutine *coroutine;
5062 int ret;
5063} CoroutineIOCompletion;
5064
5065static void bdrv_co_io_em_complete(void *opaque, int ret)
5066{
5067 CoroutineIOCompletion *co = opaque;
5068
5069 co->ret = ret;
5070 qemu_coroutine_enter(co->coroutine, NULL);
5071}
5072
5073static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5074 int nb_sectors, QEMUIOVector *iov,
5075 bool is_write)
5076{
5077 CoroutineIOCompletion co = {
5078 .coroutine = qemu_coroutine_self(),
5079 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005080 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005081
5082 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005083 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5084 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005085 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005086 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5087 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005088 }
5089
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005090 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005091 if (!acb) {
5092 return -EIO;
5093 }
5094 qemu_coroutine_yield();
5095
5096 return co.ret;
5097}
5098
5099static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5100 int64_t sector_num, int nb_sectors,
5101 QEMUIOVector *iov)
5102{
5103 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5104}
5105
5106static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5107 int64_t sector_num, int nb_sectors,
5108 QEMUIOVector *iov)
5109{
5110 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5111}
5112
Paolo Bonzini07f07612011-10-17 12:32:12 +02005113static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005114{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005115 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005116
Paolo Bonzini07f07612011-10-17 12:32:12 +02005117 rwco->ret = bdrv_co_flush(rwco->bs);
5118}
5119
5120int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5121{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005122 int ret;
5123
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005124 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005125 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005126 }
5127
Kevin Wolfca716362011-11-10 18:13:59 +01005128 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005129 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005130 if (bs->drv->bdrv_co_flush_to_os) {
5131 ret = bs->drv->bdrv_co_flush_to_os(bs);
5132 if (ret < 0) {
5133 return ret;
5134 }
5135 }
5136
Kevin Wolfca716362011-11-10 18:13:59 +01005137 /* But don't actually force it to the disk with cache=unsafe */
5138 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005139 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005140 }
5141
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005142 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005143 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005144 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005145 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005146 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005147 CoroutineIOCompletion co = {
5148 .coroutine = qemu_coroutine_self(),
5149 };
5150
5151 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5152 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005153 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005154 } else {
5155 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005156 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005157 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005158 } else {
5159 /*
5160 * Some block drivers always operate in either writethrough or unsafe
5161 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5162 * know how the server works (because the behaviour is hardcoded or
5163 * depends on server-side configuration), so we can't ensure that
5164 * everything is safe on disk. Returning an error doesn't work because
5165 * that would break guests even if the server operates in writethrough
5166 * mode.
5167 *
5168 * Let's hope the user knows what he's doing.
5169 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005170 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005171 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005172 if (ret < 0) {
5173 return ret;
5174 }
5175
5176 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5177 * in the case of cache=unsafe, so there are no useless flushes.
5178 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005179flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005180 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005181}
5182
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005183void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005184{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005185 Error *local_err = NULL;
5186 int ret;
5187
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005188 if (!bs->drv) {
5189 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005190 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005191
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005192 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5193 return;
5194 }
5195 bs->open_flags &= ~BDRV_O_INCOMING;
5196
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005197 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005198 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005199 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005200 bdrv_invalidate_cache(bs->file, &local_err);
5201 }
5202 if (local_err) {
5203 error_propagate(errp, local_err);
5204 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005205 }
5206
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005207 ret = refresh_total_sectors(bs, bs->total_sectors);
5208 if (ret < 0) {
5209 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5210 return;
5211 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005212}
5213
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005214void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005215{
5216 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005217 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005218
Benoît Canetdc364f42014-01-23 21:31:32 +01005219 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005220 AioContext *aio_context = bdrv_get_aio_context(bs);
5221
5222 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005223 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005224 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005225 if (local_err) {
5226 error_propagate(errp, local_err);
5227 return;
5228 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005229 }
5230}
5231
Paolo Bonzini07f07612011-10-17 12:32:12 +02005232int bdrv_flush(BlockDriverState *bs)
5233{
5234 Coroutine *co;
5235 RwCo rwco = {
5236 .bs = bs,
5237 .ret = NOT_DONE,
5238 };
5239
5240 if (qemu_in_coroutine()) {
5241 /* Fast-path if already in coroutine context */
5242 bdrv_flush_co_entry(&rwco);
5243 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005244 AioContext *aio_context = bdrv_get_aio_context(bs);
5245
Paolo Bonzini07f07612011-10-17 12:32:12 +02005246 co = qemu_coroutine_create(bdrv_flush_co_entry);
5247 qemu_coroutine_enter(co, &rwco);
5248 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005249 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005250 }
5251 }
5252
5253 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005254}
5255
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005256typedef struct DiscardCo {
5257 BlockDriverState *bs;
5258 int64_t sector_num;
5259 int nb_sectors;
5260 int ret;
5261} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005262static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5263{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005264 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005265
5266 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5267}
5268
5269int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5270 int nb_sectors)
5271{
Max Reitzb9c64942015-02-05 13:58:25 -05005272 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005273
Paolo Bonzini4265d622011-10-17 12:32:14 +02005274 if (!bs->drv) {
5275 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005276 }
5277
5278 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5279 if (ret < 0) {
5280 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005281 } else if (bs->read_only) {
5282 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005283 }
5284
Fam Zhenge4654d22013-11-13 18:29:43 +08005285 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005286
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005287 /* Do nothing if disabled. */
5288 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5289 return 0;
5290 }
5291
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005292 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005293 return 0;
5294 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005295
Peter Lieven75af1f32015-02-06 11:54:11 +01005296 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005297 while (nb_sectors > 0) {
5298 int ret;
5299 int num = nb_sectors;
5300
5301 /* align request */
5302 if (bs->bl.discard_alignment &&
5303 num >= bs->bl.discard_alignment &&
5304 sector_num % bs->bl.discard_alignment) {
5305 if (num > bs->bl.discard_alignment) {
5306 num = bs->bl.discard_alignment;
5307 }
5308 num -= sector_num % bs->bl.discard_alignment;
5309 }
5310
5311 /* limit request size */
5312 if (num > max_discard) {
5313 num = max_discard;
5314 }
5315
5316 if (bs->drv->bdrv_co_discard) {
5317 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5318 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005319 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005320 CoroutineIOCompletion co = {
5321 .coroutine = qemu_coroutine_self(),
5322 };
5323
5324 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5325 bdrv_co_io_em_complete, &co);
5326 if (acb == NULL) {
5327 return -EIO;
5328 } else {
5329 qemu_coroutine_yield();
5330 ret = co.ret;
5331 }
5332 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005333 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005334 return ret;
5335 }
5336
5337 sector_num += num;
5338 nb_sectors -= num;
5339 }
5340 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005341}
5342
5343int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5344{
5345 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005346 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005347 .bs = bs,
5348 .sector_num = sector_num,
5349 .nb_sectors = nb_sectors,
5350 .ret = NOT_DONE,
5351 };
5352
5353 if (qemu_in_coroutine()) {
5354 /* Fast-path if already in coroutine context */
5355 bdrv_discard_co_entry(&rwco);
5356 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005357 AioContext *aio_context = bdrv_get_aio_context(bs);
5358
Paolo Bonzini4265d622011-10-17 12:32:14 +02005359 co = qemu_coroutine_create(bdrv_discard_co_entry);
5360 qemu_coroutine_enter(co, &rwco);
5361 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005362 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005363 }
5364 }
5365
5366 return rwco.ret;
5367}
5368
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005369/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005370/* removable device support */
5371
5372/**
5373 * Return TRUE if the media is present
5374 */
5375int bdrv_is_inserted(BlockDriverState *bs)
5376{
5377 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005378
bellard19cb3732006-08-19 11:45:59 +00005379 if (!drv)
5380 return 0;
5381 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005382 return 1;
5383 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005384}
5385
5386/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005387 * Return whether the media changed since the last call to this
5388 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005389 */
5390int bdrv_media_changed(BlockDriverState *bs)
5391{
5392 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005393
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005394 if (drv && drv->bdrv_media_changed) {
5395 return drv->bdrv_media_changed(bs);
5396 }
5397 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005398}
5399
5400/**
5401 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5402 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005403void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005404{
5405 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005406 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005407
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005408 if (drv && drv->bdrv_eject) {
5409 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005410 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005411
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005412 device_name = bdrv_get_device_name(bs);
5413 if (device_name[0] != '\0') {
5414 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005415 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005416 }
bellard19cb3732006-08-19 11:45:59 +00005417}
5418
bellard19cb3732006-08-19 11:45:59 +00005419/**
5420 * Lock or unlock the media (if it is locked, the user won't be able
5421 * to eject it manually).
5422 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005423void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005424{
5425 BlockDriver *drv = bs->drv;
5426
Markus Armbruster025e8492011-09-06 18:58:47 +02005427 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005428
Markus Armbruster025e8492011-09-06 18:58:47 +02005429 if (drv && drv->bdrv_lock_medium) {
5430 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005431 }
5432}
ths985a03b2007-12-24 16:10:43 +00005433
5434/* needed for generic scsi interface */
5435
5436int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5437{
5438 BlockDriver *drv = bs->drv;
5439
5440 if (drv && drv->bdrv_ioctl)
5441 return drv->bdrv_ioctl(bs, req, buf);
5442 return -ENOTSUP;
5443}
aliguori7d780662009-03-12 19:57:08 +00005444
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005445BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005446 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005447 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005448{
aliguori221f7152009-03-28 17:28:41 +00005449 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005450
aliguori221f7152009-03-28 17:28:41 +00005451 if (drv && drv->bdrv_aio_ioctl)
5452 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5453 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005454}
aliguorie268ca52009-04-22 20:20:00 +00005455
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005456void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005457{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005458 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005459}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005460
aliguorie268ca52009-04-22 20:20:00 +00005461void *qemu_blockalign(BlockDriverState *bs, size_t size)
5462{
Kevin Wolf339064d2013-11-28 10:23:32 +01005463 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005464}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005465
Max Reitz9ebd8442014-10-22 14:09:27 +02005466void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5467{
5468 return memset(qemu_blockalign(bs, size), 0, size);
5469}
5470
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005471void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5472{
5473 size_t align = bdrv_opt_mem_align(bs);
5474
5475 /* Ensure that NULL is never returned on success */
5476 assert(align > 0);
5477 if (size == 0) {
5478 size = align;
5479 }
5480
5481 return qemu_try_memalign(align, size);
5482}
5483
Max Reitz9ebd8442014-10-22 14:09:27 +02005484void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5485{
5486 void *mem = qemu_try_blockalign(bs, size);
5487
5488 if (mem) {
5489 memset(mem, 0, size);
5490 }
5491
5492 return mem;
5493}
5494
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005495/*
5496 * Check if all memory in this vector is sector aligned.
5497 */
5498bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5499{
5500 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005501 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005502
5503 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005504 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005505 return false;
5506 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005507 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005508 return false;
5509 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005510 }
5511
5512 return true;
5513}
5514
Fam Zheng0db6e542015-04-17 19:49:50 -04005515BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
5516{
5517 BdrvDirtyBitmap *bm;
5518
5519 assert(name);
5520 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5521 if (bm->name && !strcmp(name, bm->name)) {
5522 return bm;
5523 }
5524 }
5525 return NULL;
5526}
5527
5528void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5529{
John Snow9bd2b082015-04-17 19:49:57 -04005530 assert(!bdrv_dirty_bitmap_frozen(bitmap));
Fam Zheng0db6e542015-04-17 19:49:50 -04005531 g_free(bitmap->name);
5532 bitmap->name = NULL;
5533}
5534
5535BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
John Snow5fba6c02015-04-17 19:49:51 -04005536 uint32_t granularity,
Fam Zheng0db6e542015-04-17 19:49:50 -04005537 const char *name,
Fam Zhengb8afb522014-04-16 09:34:30 +08005538 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005539{
5540 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005541 BdrvDirtyBitmap *bitmap;
John Snow5fba6c02015-04-17 19:49:51 -04005542 uint32_t sector_granularity;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005543
Paolo Bonzini50717e92013-01-21 17:09:45 +01005544 assert((granularity & (granularity - 1)) == 0);
5545
Fam Zheng0db6e542015-04-17 19:49:50 -04005546 if (name && bdrv_find_dirty_bitmap(bs, name)) {
5547 error_setg(errp, "Bitmap already exists: %s", name);
5548 return NULL;
5549 }
John Snow5fba6c02015-04-17 19:49:51 -04005550 sector_granularity = granularity >> BDRV_SECTOR_BITS;
5551 assert(sector_granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005552 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005553 if (bitmap_size < 0) {
5554 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5555 errno = -bitmap_size;
5556 return NULL;
5557 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005558 bitmap = g_new0(BdrvDirtyBitmap, 1);
John Snow5fba6c02015-04-17 19:49:51 -04005559 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
Fam Zheng0db6e542015-04-17 19:49:50 -04005560 bitmap->name = g_strdup(name);
John Snowb8e6fb72015-04-17 19:49:56 -04005561 bitmap->disabled = false;
Fam Zhenge4654d22013-11-13 18:29:43 +08005562 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5563 return bitmap;
5564}
5565
John Snow9bd2b082015-04-17 19:49:57 -04005566bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
5567{
5568 return bitmap->successor;
5569}
5570
John Snowb8e6fb72015-04-17 19:49:56 -04005571bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5572{
John Snow9bd2b082015-04-17 19:49:57 -04005573 return !(bitmap->disabled || bitmap->successor);
5574}
5575
5576/**
5577 * Create a successor bitmap destined to replace this bitmap after an operation.
5578 * Requires that the bitmap is not frozen and has no successor.
5579 */
5580int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
5581 BdrvDirtyBitmap *bitmap, Error **errp)
5582{
5583 uint64_t granularity;
5584 BdrvDirtyBitmap *child;
5585
5586 if (bdrv_dirty_bitmap_frozen(bitmap)) {
5587 error_setg(errp, "Cannot create a successor for a bitmap that is "
5588 "currently frozen");
5589 return -1;
5590 }
5591 assert(!bitmap->successor);
5592
5593 /* Create an anonymous successor */
5594 granularity = bdrv_dirty_bitmap_granularity(bitmap);
5595 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
5596 if (!child) {
5597 return -1;
5598 }
5599
5600 /* Successor will be on or off based on our current state. */
5601 child->disabled = bitmap->disabled;
5602
5603 /* Install the successor and freeze the parent */
5604 bitmap->successor = child;
5605 return 0;
5606}
5607
5608/**
5609 * For a bitmap with a successor, yield our name to the successor,
5610 * delete the old bitmap, and return a handle to the new bitmap.
5611 */
5612BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
5613 BdrvDirtyBitmap *bitmap,
5614 Error **errp)
5615{
5616 char *name;
5617 BdrvDirtyBitmap *successor = bitmap->successor;
5618
5619 if (successor == NULL) {
5620 error_setg(errp, "Cannot relinquish control if "
5621 "there's no successor present");
5622 return NULL;
5623 }
5624
5625 name = bitmap->name;
5626 bitmap->name = NULL;
5627 successor->name = name;
5628 bitmap->successor = NULL;
5629 bdrv_release_dirty_bitmap(bs, bitmap);
5630
5631 return successor;
5632}
5633
5634/**
5635 * In cases of failure where we can no longer safely delete the parent,
5636 * we may wish to re-join the parent and child/successor.
5637 * The merged parent will be un-frozen, but not explicitly re-enabled.
5638 */
5639BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
5640 BdrvDirtyBitmap *parent,
5641 Error **errp)
5642{
5643 BdrvDirtyBitmap *successor = parent->successor;
5644
5645 if (!successor) {
5646 error_setg(errp, "Cannot reclaim a successor when none is present");
5647 return NULL;
5648 }
5649
5650 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
5651 error_setg(errp, "Merging of parent and successor bitmap failed");
5652 return NULL;
5653 }
5654 bdrv_release_dirty_bitmap(bs, successor);
5655 parent->successor = NULL;
5656
5657 return parent;
John Snowb8e6fb72015-04-17 19:49:56 -04005658}
5659
Fam Zhenge4654d22013-11-13 18:29:43 +08005660void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5661{
5662 BdrvDirtyBitmap *bm, *next;
5663 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5664 if (bm == bitmap) {
John Snow9bd2b082015-04-17 19:49:57 -04005665 assert(!bdrv_dirty_bitmap_frozen(bm));
Fam Zhenge4654d22013-11-13 18:29:43 +08005666 QLIST_REMOVE(bitmap, list);
5667 hbitmap_free(bitmap->bitmap);
Fam Zheng0db6e542015-04-17 19:49:50 -04005668 g_free(bitmap->name);
Fam Zhenge4654d22013-11-13 18:29:43 +08005669 g_free(bitmap);
5670 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005671 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005672 }
5673}
5674
John Snowb8e6fb72015-04-17 19:49:56 -04005675void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5676{
John Snow9bd2b082015-04-17 19:49:57 -04005677 assert(!bdrv_dirty_bitmap_frozen(bitmap));
John Snowb8e6fb72015-04-17 19:49:56 -04005678 bitmap->disabled = true;
5679}
5680
5681void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5682{
John Snow9bd2b082015-04-17 19:49:57 -04005683 assert(!bdrv_dirty_bitmap_frozen(bitmap));
John Snowb8e6fb72015-04-17 19:49:56 -04005684 bitmap->disabled = false;
5685}
5686
Fam Zheng21b56832013-11-13 18:29:44 +08005687BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5688{
5689 BdrvDirtyBitmap *bm;
5690 BlockDirtyInfoList *list = NULL;
5691 BlockDirtyInfoList **plist = &list;
5692
5693 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005694 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5695 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005696 info->count = bdrv_get_dirty_count(bs, bm);
John Snow592fdd02015-04-17 19:49:53 -04005697 info->granularity = bdrv_dirty_bitmap_granularity(bm);
Fam Zheng0db6e542015-04-17 19:49:50 -04005698 info->has_name = !!bm->name;
5699 info->name = g_strdup(bm->name);
Fam Zheng21b56832013-11-13 18:29:44 +08005700 entry->value = info;
5701 *plist = entry;
5702 plist = &entry->next;
5703 }
5704
5705 return list;
5706}
5707
Fam Zhenge4654d22013-11-13 18:29:43 +08005708int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005709{
Fam Zhenge4654d22013-11-13 18:29:43 +08005710 if (bitmap) {
5711 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005712 } else {
5713 return 0;
5714 }
5715}
5716
John Snow341ebc22015-04-17 19:49:52 -04005717/**
5718 * Chooses a default granularity based on the existing cluster size,
5719 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5720 * is no cluster size information available.
5721 */
5722uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5723{
5724 BlockDriverInfo bdi;
5725 uint32_t granularity;
5726
5727 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5728 granularity = MAX(4096, bdi.cluster_size);
5729 granularity = MIN(65536, granularity);
5730 } else {
5731 granularity = 65536;
5732 }
5733
5734 return granularity;
5735}
5736
John Snow592fdd02015-04-17 19:49:53 -04005737uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5738{
5739 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5740}
5741
Fam Zhenge4654d22013-11-13 18:29:43 +08005742void bdrv_dirty_iter_init(BlockDriverState *bs,
5743 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005744{
Fam Zhenge4654d22013-11-13 18:29:43 +08005745 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005746}
5747
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005748void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5749 int64_t cur_sector, int nr_sectors)
5750{
John Snowb8e6fb72015-04-17 19:49:56 -04005751 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005752 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5753}
5754
5755void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5756 int64_t cur_sector, int nr_sectors)
5757{
John Snowb8e6fb72015-04-17 19:49:56 -04005758 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005759 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5760}
5761
5762static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5763 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005764{
Fam Zhenge4654d22013-11-13 18:29:43 +08005765 BdrvDirtyBitmap *bitmap;
5766 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005767 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5768 continue;
5769 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005770 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005771 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005772}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005773
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005774static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5775 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005776{
5777 BdrvDirtyBitmap *bitmap;
5778 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005779 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5780 continue;
5781 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005782 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5783 }
5784}
5785
John Snowd58d8452015-04-17 19:49:58 -04005786/**
5787 * Advance an HBitmapIter to an arbitrary offset.
5788 */
5789void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
5790{
5791 assert(hbi->hb);
5792 hbitmap_iter_init(hbi, hbi->hb, offset);
5793}
5794
Fam Zhenge4654d22013-11-13 18:29:43 +08005795int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5796{
5797 return hbitmap_count(bitmap->bitmap);
5798}
5799
Fam Zheng9fcb0252013-08-23 09:14:46 +08005800/* Get a reference to bs */
5801void bdrv_ref(BlockDriverState *bs)
5802{
5803 bs->refcnt++;
5804}
5805
5806/* Release a previously grabbed reference to bs.
5807 * If after releasing, reference count is zero, the BlockDriverState is
5808 * deleted. */
5809void bdrv_unref(BlockDriverState *bs)
5810{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005811 if (!bs) {
5812 return;
5813 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005814 assert(bs->refcnt > 0);
5815 if (--bs->refcnt == 0) {
5816 bdrv_delete(bs);
5817 }
5818}
5819
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005820struct BdrvOpBlocker {
5821 Error *reason;
5822 QLIST_ENTRY(BdrvOpBlocker) list;
5823};
5824
5825bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5826{
5827 BdrvOpBlocker *blocker;
5828 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5829 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5830 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5831 if (errp) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03005832 error_setg(errp, "Node '%s' is busy: %s",
5833 bdrv_get_device_or_node_name(bs),
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005834 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005835 }
5836 return true;
5837 }
5838 return false;
5839}
5840
5841void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5842{
5843 BdrvOpBlocker *blocker;
5844 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5845
Markus Armbruster5839e532014-08-19 10:31:08 +02005846 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005847 blocker->reason = reason;
5848 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5849}
5850
5851void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5852{
5853 BdrvOpBlocker *blocker, *next;
5854 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5855 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5856 if (blocker->reason == reason) {
5857 QLIST_REMOVE(blocker, list);
5858 g_free(blocker);
5859 }
5860 }
5861}
5862
5863void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5864{
5865 int i;
5866 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5867 bdrv_op_block(bs, i, reason);
5868 }
5869}
5870
5871void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5872{
5873 int i;
5874 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5875 bdrv_op_unblock(bs, i, reason);
5876 }
5877}
5878
5879bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5880{
5881 int i;
5882
5883 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5884 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5885 return false;
5886 }
5887 }
5888 return true;
5889}
5890
Luiz Capitulino28a72822011-09-26 17:43:50 -03005891void bdrv_iostatus_enable(BlockDriverState *bs)
5892{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005893 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005894 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005895}
5896
5897/* The I/O status is only enabled if the drive explicitly
5898 * enables it _and_ the VM is configured to stop on errors */
5899bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5900{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005901 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005902 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5903 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5904 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005905}
5906
5907void bdrv_iostatus_disable(BlockDriverState *bs)
5908{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005909 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005910}
5911
5912void bdrv_iostatus_reset(BlockDriverState *bs)
5913{
5914 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005915 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005916 if (bs->job) {
5917 block_job_iostatus_reset(bs->job);
5918 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005919 }
5920}
5921
Luiz Capitulino28a72822011-09-26 17:43:50 -03005922void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5923{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005924 assert(bdrv_iostatus_is_enabled(bs));
5925 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005926 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5927 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005928 }
5929}
5930
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005931void bdrv_img_create(const char *filename, const char *fmt,
5932 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005933 char *options, uint64_t img_size, int flags,
5934 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005935{
Chunyan Liu83d05212014-06-05 17:20:51 +08005936 QemuOptsList *create_opts = NULL;
5937 QemuOpts *opts = NULL;
5938 const char *backing_fmt, *backing_file;
5939 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005940 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005941 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005942 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005943 int ret = 0;
5944
5945 /* Find driver and parse its options */
5946 drv = bdrv_find_format(fmt);
5947 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005948 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005949 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005950 }
5951
Max Reitzb65a5e12015-02-05 13:58:12 -05005952 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005953 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005954 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005955 }
5956
Max Reitzc6149722014-12-02 18:32:45 +01005957 if (!drv->create_opts) {
5958 error_setg(errp, "Format driver '%s' does not support image creation",
5959 drv->format_name);
5960 return;
5961 }
5962
5963 if (!proto_drv->create_opts) {
5964 error_setg(errp, "Protocol driver '%s' does not support image creation",
5965 proto_drv->format_name);
5966 return;
5967 }
5968
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005969 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5970 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005971
5972 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005973 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005974 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005975
5976 /* Parse -o options */
5977 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005978 qemu_opts_do_parse(opts, options, NULL, &local_err);
5979 if (local_err) {
5980 error_report_err(local_err);
5981 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005982 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005983 goto out;
5984 }
5985 }
5986
5987 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005988 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005989 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005990 error_setg(errp, "Backing file not supported for file format '%s'",
5991 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005992 goto out;
5993 }
5994 }
5995
5996 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005997 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005998 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005999 error_setg(errp, "Backing file format not supported for file "
6000 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006001 goto out;
6002 }
6003 }
6004
Chunyan Liu83d05212014-06-05 17:20:51 +08006005 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
6006 if (backing_file) {
6007 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006008 error_setg(errp, "Error: Trying to create an image with the "
6009 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01006010 goto out;
6011 }
6012 }
6013
Chunyan Liu83d05212014-06-05 17:20:51 +08006014 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
6015 if (backing_fmt) {
6016 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00006017 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006018 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08006019 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006020 goto out;
6021 }
6022 }
6023
6024 // The size for the image must always be specified, with one exception:
6025 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08006026 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
6027 if (size == -1) {
6028 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01006029 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01006030 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02006031 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02006032 int back_flags;
6033
Max Reitz29168012014-11-26 17:20:27 +01006034 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
6035 full_backing, PATH_MAX,
6036 &local_err);
6037 if (local_err) {
6038 g_free(full_backing);
6039 goto out;
6040 }
6041
Paolo Bonzini63090da2012-04-12 14:01:03 +02006042 /* backing files always opened read-only */
6043 back_flags =
6044 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006045
Max Reitzf67503e2014-02-18 18:33:05 +01006046 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01006047 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02006048 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01006049 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006050 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006051 goto out;
6052 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02006053 size = bdrv_getlength(bs);
6054 if (size < 0) {
6055 error_setg_errno(errp, -size, "Could not get size of '%s'",
6056 backing_file);
6057 bdrv_unref(bs);
6058 goto out;
6059 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006060
Markus Armbruster39101f22015-02-12 16:46:36 +01006061 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01006062
6063 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006064 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02006065 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006066 goto out;
6067 }
6068 }
6069
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01006070 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08006071 printf("Formatting '%s', fmt=%s", filename, fmt);
6072 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01006073 puts("");
6074 }
Chunyan Liu83d05212014-06-05 17:20:51 +08006075
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006076 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08006077
Max Reitzcc84d902013-09-06 17:14:26 +02006078 if (ret == -EFBIG) {
6079 /* This is generally a better message than whatever the driver would
6080 * deliver (especially because of the cluster_size_hint), since that
6081 * is most probably not much different from "image too large". */
6082 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08006083 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02006084 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006085 }
Max Reitzcc84d902013-09-06 17:14:26 +02006086 error_setg(errp, "The image size is too large for file format '%s'"
6087 "%s", fmt, cluster_size_hint);
6088 error_free(local_err);
6089 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006090 }
6091
6092out:
Chunyan Liu83d05212014-06-05 17:20:51 +08006093 qemu_opts_del(opts);
6094 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01006095 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02006096 error_propagate(errp, local_err);
6097 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01006098}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006099
6100AioContext *bdrv_get_aio_context(BlockDriverState *bs)
6101{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006102 return bs->aio_context;
6103}
6104
6105void bdrv_detach_aio_context(BlockDriverState *bs)
6106{
Max Reitz33384422014-06-20 21:57:33 +02006107 BdrvAioNotifier *baf;
6108
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006109 if (!bs->drv) {
6110 return;
6111 }
6112
Max Reitz33384422014-06-20 21:57:33 +02006113 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
6114 baf->detach_aio_context(baf->opaque);
6115 }
6116
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006117 if (bs->io_limits_enabled) {
6118 throttle_detach_aio_context(&bs->throttle_state);
6119 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006120 if (bs->drv->bdrv_detach_aio_context) {
6121 bs->drv->bdrv_detach_aio_context(bs);
6122 }
6123 if (bs->file) {
6124 bdrv_detach_aio_context(bs->file);
6125 }
6126 if (bs->backing_hd) {
6127 bdrv_detach_aio_context(bs->backing_hd);
6128 }
6129
6130 bs->aio_context = NULL;
6131}
6132
6133void bdrv_attach_aio_context(BlockDriverState *bs,
6134 AioContext *new_context)
6135{
Max Reitz33384422014-06-20 21:57:33 +02006136 BdrvAioNotifier *ban;
6137
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006138 if (!bs->drv) {
6139 return;
6140 }
6141
6142 bs->aio_context = new_context;
6143
6144 if (bs->backing_hd) {
6145 bdrv_attach_aio_context(bs->backing_hd, new_context);
6146 }
6147 if (bs->file) {
6148 bdrv_attach_aio_context(bs->file, new_context);
6149 }
6150 if (bs->drv->bdrv_attach_aio_context) {
6151 bs->drv->bdrv_attach_aio_context(bs, new_context);
6152 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006153 if (bs->io_limits_enabled) {
6154 throttle_attach_aio_context(&bs->throttle_state, new_context);
6155 }
Max Reitz33384422014-06-20 21:57:33 +02006156
6157 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
6158 ban->attached_aio_context(new_context, ban->opaque);
6159 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006160}
6161
6162void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6163{
6164 bdrv_drain_all(); /* ensure there are no in-flight requests */
6165
6166 bdrv_detach_aio_context(bs);
6167
6168 /* This function executes in the old AioContext so acquire the new one in
6169 * case it runs in a different thread.
6170 */
6171 aio_context_acquire(new_context);
6172 bdrv_attach_aio_context(bs, new_context);
6173 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006174}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006175
Max Reitz33384422014-06-20 21:57:33 +02006176void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6177 void (*attached_aio_context)(AioContext *new_context, void *opaque),
6178 void (*detach_aio_context)(void *opaque), void *opaque)
6179{
6180 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6181 *ban = (BdrvAioNotifier){
6182 .attached_aio_context = attached_aio_context,
6183 .detach_aio_context = detach_aio_context,
6184 .opaque = opaque
6185 };
6186
6187 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6188}
6189
6190void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6191 void (*attached_aio_context)(AioContext *,
6192 void *),
6193 void (*detach_aio_context)(void *),
6194 void *opaque)
6195{
6196 BdrvAioNotifier *ban, *ban_next;
6197
6198 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6199 if (ban->attached_aio_context == attached_aio_context &&
6200 ban->detach_aio_context == detach_aio_context &&
6201 ban->opaque == opaque)
6202 {
6203 QLIST_REMOVE(ban, list);
6204 g_free(ban);
6205
6206 return;
6207 }
6208 }
6209
6210 abort();
6211}
6212
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006213void bdrv_add_before_write_notifier(BlockDriverState *bs,
6214 NotifierWithReturn *notifier)
6215{
6216 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6217}
Max Reitz6f176b42013-09-03 10:09:50 +02006218
Max Reitz77485432014-10-27 11:12:50 +01006219int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6220 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02006221{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006222 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02006223 return -ENOTSUP;
6224 }
Max Reitz77485432014-10-27 11:12:50 +01006225 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02006226}
Benoît Canetf6186f42013-10-02 14:33:48 +02006227
Benoît Canetb5042a32014-03-03 19:11:34 +01006228/* This function will be called by the bdrv_recurse_is_first_non_filter method
6229 * of block filter and by bdrv_is_first_non_filter.
6230 * It is used to test if the given bs is the candidate or recurse more in the
6231 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01006232 */
Benoît Canet212a5a82014-01-23 21:31:36 +01006233bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6234 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02006235{
Benoît Canetb5042a32014-03-03 19:11:34 +01006236 /* return false if basic checks fails */
6237 if (!bs || !bs->drv) {
6238 return false;
6239 }
6240
6241 /* the code reached a non block filter driver -> check if the bs is
6242 * the same as the candidate. It's the recursion termination condition.
6243 */
6244 if (!bs->drv->is_filter) {
6245 return bs == candidate;
6246 }
6247 /* Down this path the driver is a block filter driver */
6248
6249 /* If the block filter recursion method is defined use it to recurse down
6250 * the node graph.
6251 */
6252 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006253 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6254 }
6255
Benoît Canetb5042a32014-03-03 19:11:34 +01006256 /* the driver is a block filter but don't allow to recurse -> return false
6257 */
6258 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006259}
6260
6261/* This function checks if the candidate is the first non filter bs down it's
6262 * bs chain. Since we don't have pointers to parents it explore all bs chains
6263 * from the top. Some filters can choose not to pass down the recursion.
6264 */
6265bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6266{
6267 BlockDriverState *bs;
6268
6269 /* walk down the bs forest recursively */
6270 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6271 bool perm;
6272
Benoît Canetb5042a32014-03-03 19:11:34 +01006273 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006274 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006275
6276 /* candidate is the first non filter */
6277 if (perm) {
6278 return true;
6279 }
6280 }
6281
6282 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006283}
Benoît Canet09158f02014-06-27 18:25:25 +02006284
6285BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6286{
6287 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006288 AioContext *aio_context;
6289
Benoît Canet09158f02014-06-27 18:25:25 +02006290 if (!to_replace_bs) {
6291 error_setg(errp, "Node name '%s' not found", node_name);
6292 return NULL;
6293 }
6294
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006295 aio_context = bdrv_get_aio_context(to_replace_bs);
6296 aio_context_acquire(aio_context);
6297
Benoît Canet09158f02014-06-27 18:25:25 +02006298 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006299 to_replace_bs = NULL;
6300 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006301 }
6302
6303 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6304 * most non filter in order to prevent data corruption.
6305 * Another benefit is that this tests exclude backing files which are
6306 * blocked by the backing blockers.
6307 */
6308 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6309 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006310 to_replace_bs = NULL;
6311 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006312 }
6313
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006314out:
6315 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006316 return to_replace_bs;
6317}
Ming Lei448ad912014-07-04 18:04:33 +08006318
6319void bdrv_io_plug(BlockDriverState *bs)
6320{
6321 BlockDriver *drv = bs->drv;
6322 if (drv && drv->bdrv_io_plug) {
6323 drv->bdrv_io_plug(bs);
6324 } else if (bs->file) {
6325 bdrv_io_plug(bs->file);
6326 }
6327}
6328
6329void bdrv_io_unplug(BlockDriverState *bs)
6330{
6331 BlockDriver *drv = bs->drv;
6332 if (drv && drv->bdrv_io_unplug) {
6333 drv->bdrv_io_unplug(bs);
6334 } else if (bs->file) {
6335 bdrv_io_unplug(bs->file);
6336 }
6337}
6338
6339void bdrv_flush_io_queue(BlockDriverState *bs)
6340{
6341 BlockDriver *drv = bs->drv;
6342 if (drv && drv->bdrv_flush_io_queue) {
6343 drv->bdrv_flush_io_queue(bs);
6344 } else if (bs->file) {
6345 bdrv_flush_io_queue(bs->file);
6346 }
6347}
Max Reitz91af7012014-07-18 20:24:56 +02006348
6349static bool append_open_options(QDict *d, BlockDriverState *bs)
6350{
6351 const QDictEntry *entry;
6352 bool found_any = false;
6353
6354 for (entry = qdict_first(bs->options); entry;
6355 entry = qdict_next(bs->options, entry))
6356 {
6357 /* Only take options for this level and exclude all non-driver-specific
6358 * options */
6359 if (!strchr(qdict_entry_key(entry), '.') &&
6360 strcmp(qdict_entry_key(entry), "node-name"))
6361 {
6362 qobject_incref(qdict_entry_value(entry));
6363 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6364 found_any = true;
6365 }
6366 }
6367
6368 return found_any;
6369}
6370
6371/* Updates the following BDS fields:
6372 * - exact_filename: A filename which may be used for opening a block device
6373 * which (mostly) equals the given BDS (even without any
6374 * other options; so reading and writing must return the same
6375 * results, but caching etc. may be different)
6376 * - full_open_options: Options which, when given when opening a block device
6377 * (without a filename), result in a BDS (mostly)
6378 * equalling the given one
6379 * - filename: If exact_filename is set, it is copied here. Otherwise,
6380 * full_open_options is converted to a JSON object, prefixed with
6381 * "json:" (for use through the JSON pseudo protocol) and put here.
6382 */
6383void bdrv_refresh_filename(BlockDriverState *bs)
6384{
6385 BlockDriver *drv = bs->drv;
6386 QDict *opts;
6387
6388 if (!drv) {
6389 return;
6390 }
6391
6392 /* This BDS's file name will most probably depend on its file's name, so
6393 * refresh that first */
6394 if (bs->file) {
6395 bdrv_refresh_filename(bs->file);
6396 }
6397
6398 if (drv->bdrv_refresh_filename) {
6399 /* Obsolete information is of no use here, so drop the old file name
6400 * information before refreshing it */
6401 bs->exact_filename[0] = '\0';
6402 if (bs->full_open_options) {
6403 QDECREF(bs->full_open_options);
6404 bs->full_open_options = NULL;
6405 }
6406
6407 drv->bdrv_refresh_filename(bs);
6408 } else if (bs->file) {
6409 /* Try to reconstruct valid information from the underlying file */
6410 bool has_open_options;
6411
6412 bs->exact_filename[0] = '\0';
6413 if (bs->full_open_options) {
6414 QDECREF(bs->full_open_options);
6415 bs->full_open_options = NULL;
6416 }
6417
6418 opts = qdict_new();
6419 has_open_options = append_open_options(opts, bs);
6420
6421 /* If no specific options have been given for this BDS, the filename of
6422 * the underlying file should suffice for this one as well */
6423 if (bs->file->exact_filename[0] && !has_open_options) {
6424 strcpy(bs->exact_filename, bs->file->exact_filename);
6425 }
6426 /* Reconstructing the full options QDict is simple for most format block
6427 * drivers, as long as the full options are known for the underlying
6428 * file BDS. The full options QDict of that file BDS should somehow
6429 * contain a representation of the filename, therefore the following
6430 * suffices without querying the (exact_)filename of this BDS. */
6431 if (bs->file->full_open_options) {
6432 qdict_put_obj(opts, "driver",
6433 QOBJECT(qstring_from_str(drv->format_name)));
6434 QINCREF(bs->file->full_open_options);
6435 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6436
6437 bs->full_open_options = opts;
6438 } else {
6439 QDECREF(opts);
6440 }
6441 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6442 /* There is no underlying file BDS (at least referenced by BDS.file),
6443 * so the full options QDict should be equal to the options given
6444 * specifically for this block device when it was opened (plus the
6445 * driver specification).
6446 * Because those options don't change, there is no need to update
6447 * full_open_options when it's already set. */
6448
6449 opts = qdict_new();
6450 append_open_options(opts, bs);
6451 qdict_put_obj(opts, "driver",
6452 QOBJECT(qstring_from_str(drv->format_name)));
6453
6454 if (bs->exact_filename[0]) {
6455 /* This may not work for all block protocol drivers (some may
6456 * require this filename to be parsed), but we have to find some
6457 * default solution here, so just include it. If some block driver
6458 * does not support pure options without any filename at all or
6459 * needs some special format of the options QDict, it needs to
6460 * implement the driver-specific bdrv_refresh_filename() function.
6461 */
6462 qdict_put_obj(opts, "filename",
6463 QOBJECT(qstring_from_str(bs->exact_filename)));
6464 }
6465
6466 bs->full_open_options = opts;
6467 }
6468
6469 if (bs->exact_filename[0]) {
6470 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6471 } else if (bs->full_open_options) {
6472 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6473 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6474 qstring_get_str(json));
6475 QDECREF(json);
6476 }
6477}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006478
6479/* This accessor function purpose is to allow the device models to access the
6480 * BlockAcctStats structure embedded inside a BlockDriverState without being
6481 * aware of the BlockDriverState structure layout.
6482 * It will go away when the BlockAcctStats structure will be moved inside
6483 * the device models.
6484 */
6485BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6486{
6487 return &bs->stats;
6488}