blob: 757d139a1f83d36744061d60437b0395495f044e [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010031#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010032#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010033#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010034#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030035#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010036#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020037#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000038
Juan Quintela71e72a12009-07-27 16:12:56 +020039#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000040#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000043#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000044#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000045#include <sys/disk.h>
46#endif
blueswir1c5e97232009-03-07 20:06:23 +000047#endif
bellard7674e7b2005-04-26 21:59:26 +000048
aliguori49dc7682009-03-08 16:26:59 +000049#ifdef _WIN32
50#include <windows.h>
51#endif
52
Fam Zhenge4654d22013-11-13 18:29:43 +080053struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010058#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020060static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000063 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000066 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010079static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010083 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010084 BlockDriverCompletionFunc *cb,
85 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010086 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200182 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 QEMU_CLOCK_VIRTUAL,
184 bdrv_throttle_read_timer_cb,
185 bdrv_throttle_write_timer_cb,
186 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800187 bs->io_limits_enabled = true;
188}
189
Benoît Canetcc0681c2013-09-02 14:14:39 +0200190/* This function makes an IO wait if needed
191 *
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
194 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800195static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100196 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200197 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200199 /* does this io must wait */
200 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* if must wait or any request of this type throttled queue the IO */
203 if (must_wait ||
204 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
205 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800206 }
207
Benoît Canetcc0681c2013-09-02 14:14:39 +0200208 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100209 throttle_account(&bs->throttle_state, is_write, bytes);
210
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800211
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
214 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800215 }
216
Benoît Canetcc0681c2013-09-02 14:14:39 +0200217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800219}
220
Kevin Wolf339064d2013-11-28 10:23:32 +0100221size_t bdrv_opt_mem_align(BlockDriverState *bs)
222{
223 if (!bs || !bs->drv) {
224 /* 4k should be on the safe side */
225 return 4096;
226 }
227
228 return bs->bl.opt_mem_alignment;
229}
230
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000231/* check if the path starts with "<protocol>:" */
232static int path_has_protocol(const char *path)
233{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200234 const char *p;
235
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000236#ifdef _WIN32
237 if (is_windows_drive(path) ||
238 is_windows_drive_prefix(path)) {
239 return 0;
240 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200241 p = path + strcspn(path, ":/\\");
242#else
243 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244#endif
245
Paolo Bonzini947995c2012-05-08 16:51:48 +0200246 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247}
248
bellard83f64092006-08-01 16:21:11 +0000249int path_is_absolute(const char *path)
250{
bellard21664422007-01-07 18:22:37 +0000251#ifdef _WIN32
252 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200253 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000254 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200255 }
256 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000257#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000259#endif
bellard83f64092006-08-01 16:21:11 +0000260}
261
262/* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
264 supported. */
265void path_combine(char *dest, int dest_size,
266 const char *base_path,
267 const char *filename)
268{
269 const char *p, *p1;
270 int len;
271
272 if (dest_size <= 0)
273 return;
274 if (path_is_absolute(filename)) {
275 pstrcpy(dest, dest_size, filename);
276 } else {
277 p = strchr(base_path, ':');
278 if (p)
279 p++;
280 else
281 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000282 p1 = strrchr(base_path, '/');
283#ifdef _WIN32
284 {
285 const char *p2;
286 p2 = strrchr(base_path, '\\');
287 if (!p1 || p2 > p1)
288 p1 = p2;
289 }
290#endif
bellard83f64092006-08-01 16:21:11 +0000291 if (p1)
292 p1++;
293 else
294 p1 = base_path;
295 if (p1 > p)
296 p = p1;
297 len = p - base_path;
298 if (len > dest_size - 1)
299 len = dest_size - 1;
300 memcpy(dest, base_path, len);
301 dest[len] = '\0';
302 pstrcat(dest, dest_size, filename);
303 }
304}
305
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200306void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
307{
308 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
309 pstrcpy(dest, sz, bs->backing_file);
310 } else {
311 path_combine(dest, sz, bs->filename, bs->backing_file);
312 }
313}
314
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500315void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000316{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200319 bdrv->bdrv_co_readv = bdrv_co_readv_em;
320 bdrv->bdrv_co_writev = bdrv_co_writev_em;
321
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
324 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200325 if (!bdrv->bdrv_aio_readv) {
326 /* add AIO emulation layer */
327 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
328 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200329 }
bellard83f64092006-08-01 16:21:11 +0000330 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200331
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100332 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000333}
bellardb3380822004-03-14 21:38:54 +0000334
335/* create a new block device (by default it is empty) */
Kevin Wolf98522f62014-04-17 13:16:01 +0200336BlockDriverState *bdrv_new(const char *device_name, Error **errp)
bellardfc01f7e2003-06-30 10:03:06 +0000337{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100338 BlockDriverState *bs;
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800339 int i;
bellardb3380822004-03-14 21:38:54 +0000340
Kevin Wolff2d953e2014-04-17 13:27:05 +0200341 if (bdrv_find(device_name)) {
342 error_setg(errp, "Device with id '%s' already exists",
343 device_name);
344 return NULL;
345 }
346 if (bdrv_find_node(device_name)) {
347 error_setg(errp, "Device with node-name '%s' already exists",
348 device_name);
349 return NULL;
350 }
351
Anthony Liguori7267c092011-08-20 22:09:37 -0500352 bs = g_malloc0(sizeof(BlockDriverState));
Fam Zhenge4654d22013-11-13 18:29:43 +0800353 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000354 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000355 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100356 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000357 }
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800358 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
359 QLIST_INIT(&bs->op_blockers[i]);
360 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300361 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200362 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200363 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200364 qemu_co_queue_init(&bs->throttled_reqs[0]);
365 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800366 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200367 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200368
bellardb3380822004-03-14 21:38:54 +0000369 return bs;
370}
371
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200372void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
373{
374 notifier_list_add(&bs->close_notifiers, notify);
375}
376
bellardea2384d2004-08-01 21:59:26 +0000377BlockDriver *bdrv_find_format(const char *format_name)
378{
379 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100380 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
381 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000382 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100383 }
bellardea2384d2004-08-01 21:59:26 +0000384 }
385 return NULL;
386}
387
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800388static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100389{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 static const char *whitelist_rw[] = {
391 CONFIG_BDRV_RW_WHITELIST
392 };
393 static const char *whitelist_ro[] = {
394 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100395 };
396 const char **p;
397
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800398 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100399 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800400 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100401
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800402 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100403 if (!strcmp(drv->format_name, *p)) {
404 return 1;
405 }
406 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800407 if (read_only) {
408 for (p = whitelist_ro; *p; p++) {
409 if (!strcmp(drv->format_name, *p)) {
410 return 1;
411 }
412 }
413 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100414 return 0;
415}
416
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800417BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
418 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100419{
420 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800421 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100422}
423
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800424typedef struct CreateCo {
425 BlockDriver *drv;
426 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800427 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800428 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200429 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800430} CreateCo;
431
432static void coroutine_fn bdrv_create_co_entry(void *opaque)
433{
Max Reitzcc84d902013-09-06 17:14:26 +0200434 Error *local_err = NULL;
435 int ret;
436
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800437 CreateCo *cco = opaque;
438 assert(cco->drv);
439
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800440 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100441 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200442 error_propagate(&cco->err, local_err);
443 }
444 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800445}
446
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200447int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800448 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000449{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800450 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200451
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800452 Coroutine *co;
453 CreateCo cco = {
454 .drv = drv,
455 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800456 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800457 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200458 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800459 };
460
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800461 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200462 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300463 ret = -ENOTSUP;
464 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800465 }
466
467 if (qemu_in_coroutine()) {
468 /* Fast-path if already in coroutine context */
469 bdrv_create_co_entry(&cco);
470 } else {
471 co = qemu_coroutine_create(bdrv_create_co_entry);
472 qemu_coroutine_enter(co, &cco);
473 while (cco.ret == NOT_DONE) {
474 qemu_aio_wait();
475 }
476 }
477
478 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200479 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100480 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200481 error_propagate(errp, cco.err);
482 } else {
483 error_setg_errno(errp, -ret, "Could not create image");
484 }
485 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800486
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300487out:
488 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800489 return ret;
bellardea2384d2004-08-01 21:59:26 +0000490}
491
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800492int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200493{
494 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200495 Error *local_err = NULL;
496 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200497
Kevin Wolf98289622013-07-10 15:47:39 +0200498 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200499 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200500 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000501 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200502 }
503
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800504 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100505 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200506 error_propagate(errp, local_err);
507 }
508 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200509}
510
Kevin Wolf355ef4a2013-12-11 20:14:09 +0100511int bdrv_refresh_limits(BlockDriverState *bs)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100512{
513 BlockDriver *drv = bs->drv;
514
515 memset(&bs->bl, 0, sizeof(bs->bl));
516
Kevin Wolf466ad822013-12-11 19:50:32 +0100517 if (!drv) {
518 return 0;
519 }
520
521 /* Take some limits from the children as a default */
522 if (bs->file) {
523 bdrv_refresh_limits(bs->file);
524 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100525 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
526 } else {
527 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100528 }
529
530 if (bs->backing_hd) {
531 bdrv_refresh_limits(bs->backing_hd);
532 bs->bl.opt_transfer_length =
533 MAX(bs->bl.opt_transfer_length,
534 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100535 bs->bl.opt_mem_alignment =
536 MAX(bs->bl.opt_mem_alignment,
537 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100538 }
539
540 /* Then let the driver override it */
541 if (drv->bdrv_refresh_limits) {
Kevin Wolfd34682c2013-12-11 19:26:16 +0100542 return drv->bdrv_refresh_limits(bs);
543 }
544
545 return 0;
546}
547
Jim Meyeringeba25052012-05-28 09:27:54 +0200548/*
549 * Create a uniquely-named empty temporary file.
550 * Return 0 upon success, otherwise a negative errno value.
551 */
552int get_tmp_filename(char *filename, int size)
553{
bellardd5249392004-08-03 21:14:23 +0000554#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000555 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200556 /* GetTempFileName requires that its output buffer (4th param)
557 have length MAX_PATH or greater. */
558 assert(size >= MAX_PATH);
559 return (GetTempPath(MAX_PATH, temp_dir)
560 && GetTempFileName(temp_dir, "qem", 0, filename)
561 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000562#else
bellardea2384d2004-08-01 21:59:26 +0000563 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000564 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000565 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530566 if (!tmpdir) {
567 tmpdir = "/var/tmp";
568 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200569 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
570 return -EOVERFLOW;
571 }
bellardea2384d2004-08-01 21:59:26 +0000572 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800573 if (fd < 0) {
574 return -errno;
575 }
576 if (close(fd) != 0) {
577 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200578 return -errno;
579 }
580 return 0;
bellardd5249392004-08-03 21:14:23 +0000581#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200582}
bellardea2384d2004-08-01 21:59:26 +0000583
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200584/*
585 * Detect host devices. By convention, /dev/cdrom[N] is always
586 * recognized as a host CDROM.
587 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200588static BlockDriver *find_hdev_driver(const char *filename)
589{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200590 int score_max = 0, score;
591 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200592
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100593 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200594 if (d->bdrv_probe_device) {
595 score = d->bdrv_probe_device(filename);
596 if (score > score_max) {
597 score_max = score;
598 drv = d;
599 }
600 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200601 }
602
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200603 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200604}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200605
Kevin Wolf98289622013-07-10 15:47:39 +0200606BlockDriver *bdrv_find_protocol(const char *filename,
607 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200608{
609 BlockDriver *drv1;
610 char protocol[128];
611 int len;
612 const char *p;
613
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200614 /* TODO Drivers without bdrv_file_open must be specified explicitly */
615
Christoph Hellwig39508e72010-06-23 12:25:17 +0200616 /*
617 * XXX(hch): we really should not let host device detection
618 * override an explicit protocol specification, but moving this
619 * later breaks access to device names with colons in them.
620 * Thanks to the brain-dead persistent naming schemes on udev-
621 * based Linux systems those actually are quite common.
622 */
623 drv1 = find_hdev_driver(filename);
624 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200625 return drv1;
626 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200627
Kevin Wolf98289622013-07-10 15:47:39 +0200628 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200629 return bdrv_find_format("file");
630 }
Kevin Wolf98289622013-07-10 15:47:39 +0200631
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000632 p = strchr(filename, ':');
633 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200634 len = p - filename;
635 if (len > sizeof(protocol) - 1)
636 len = sizeof(protocol) - 1;
637 memcpy(protocol, filename, len);
638 protocol[len] = '\0';
639 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
640 if (drv1->protocol_name &&
641 !strcmp(drv1->protocol_name, protocol)) {
642 return drv1;
643 }
644 }
645 return NULL;
646}
647
Kevin Wolff500a6d2012-11-12 17:35:27 +0100648static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200649 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000650{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100651 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000652 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000653 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100654 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700655
Kevin Wolf08a00552010-06-01 18:37:31 +0200656 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100657 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200658 drv = bdrv_find_format("raw");
659 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200660 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200661 ret = -ENOENT;
662 }
663 *pdrv = drv;
664 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700665 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700666
bellard83f64092006-08-01 16:21:11 +0000667 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000668 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200669 error_setg_errno(errp, -ret, "Could not read image for determining its "
670 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200671 *pdrv = NULL;
672 return ret;
bellard83f64092006-08-01 16:21:11 +0000673 }
674
bellardea2384d2004-08-01 21:59:26 +0000675 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200676 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100677 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000678 if (drv1->bdrv_probe) {
679 score = drv1->bdrv_probe(buf, ret, filename);
680 if (score > score_max) {
681 score_max = score;
682 drv = drv1;
683 }
bellardea2384d2004-08-01 21:59:26 +0000684 }
685 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200686 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200687 error_setg(errp, "Could not determine image format: No compatible "
688 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200689 ret = -ENOENT;
690 }
691 *pdrv = drv;
692 return ret;
bellardea2384d2004-08-01 21:59:26 +0000693}
694
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100695/**
696 * Set the current 'total_sectors' value
697 */
698static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
699{
700 BlockDriver *drv = bs->drv;
701
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700702 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
703 if (bs->sg)
704 return 0;
705
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100706 /* query actual device if possible, otherwise just trust the hint */
707 if (drv->bdrv_getlength) {
708 int64_t length = drv->bdrv_getlength(bs);
709 if (length < 0) {
710 return length;
711 }
Fam Zheng7e382002013-11-06 19:48:06 +0800712 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100713 }
714
715 bs->total_sectors = hint;
716 return 0;
717}
718
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100719/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100720 * Set open flags for a given discard mode
721 *
722 * Return 0 on success, -1 if the discard mode was invalid.
723 */
724int bdrv_parse_discard_flags(const char *mode, int *flags)
725{
726 *flags &= ~BDRV_O_UNMAP;
727
728 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
729 /* do nothing */
730 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
731 *flags |= BDRV_O_UNMAP;
732 } else {
733 return -1;
734 }
735
736 return 0;
737}
738
739/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100740 * Set open flags for a given cache mode
741 *
742 * Return 0 on success, -1 if the cache mode was invalid.
743 */
744int bdrv_parse_cache_flags(const char *mode, int *flags)
745{
746 *flags &= ~BDRV_O_CACHE_MASK;
747
748 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
749 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100750 } else if (!strcmp(mode, "directsync")) {
751 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100752 } else if (!strcmp(mode, "writeback")) {
753 *flags |= BDRV_O_CACHE_WB;
754 } else if (!strcmp(mode, "unsafe")) {
755 *flags |= BDRV_O_CACHE_WB;
756 *flags |= BDRV_O_NO_FLUSH;
757 } else if (!strcmp(mode, "writethrough")) {
758 /* this is the default */
759 } else {
760 return -1;
761 }
762
763 return 0;
764}
765
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000766/**
767 * The copy-on-read flag is actually a reference count so multiple users may
768 * use the feature without worrying about clobbering its previous state.
769 * Copy-on-read stays enabled until all users have called to disable it.
770 */
771void bdrv_enable_copy_on_read(BlockDriverState *bs)
772{
773 bs->copy_on_read++;
774}
775
776void bdrv_disable_copy_on_read(BlockDriverState *bs)
777{
778 assert(bs->copy_on_read > 0);
779 bs->copy_on_read--;
780}
781
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200782/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200783 * Returns the flags that a temporary snapshot should get, based on the
784 * originally requested flags (the originally requested image will have flags
785 * like a backing file)
786 */
787static int bdrv_temp_snapshot_flags(int flags)
788{
789 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
790}
791
792/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200793 * Returns the flags that bs->file should get, based on the given flags for
794 * the parent BDS
795 */
796static int bdrv_inherited_flags(int flags)
797{
798 /* Enable protocol handling, disable format probing for bs->file */
799 flags |= BDRV_O_PROTOCOL;
800
801 /* Our block drivers take care to send flushes and respect unmap policy,
802 * so we can enable both unconditionally on lower layers. */
803 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
804
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200805 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200806 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200807
808 return flags;
809}
810
Kevin Wolf317fc442014-04-25 13:27:34 +0200811/*
812 * Returns the flags that bs->backing_hd should get, based on the given flags
813 * for the parent BDS
814 */
815static int bdrv_backing_flags(int flags)
816{
817 /* backing files always opened read-only */
818 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
819
820 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200821 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200822
823 return flags;
824}
825
Kevin Wolf7b272452012-11-12 17:05:39 +0100826static int bdrv_open_flags(BlockDriverState *bs, int flags)
827{
828 int open_flags = flags | BDRV_O_CACHE_WB;
829
830 /*
831 * Clear flags that are internal to the block layer before opening the
832 * image.
833 */
834 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
835
836 /*
837 * Snapshots should be writable.
838 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200839 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100840 open_flags |= BDRV_O_RDWR;
841 }
842
843 return open_flags;
844}
845
Kevin Wolf636ea372014-01-24 14:11:52 +0100846static void bdrv_assign_node_name(BlockDriverState *bs,
847 const char *node_name,
848 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100849{
850 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100851 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100852 }
853
854 /* empty string node name is invalid */
855 if (node_name[0] == '\0') {
856 error_setg(errp, "Empty node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100857 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100858 }
859
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100860 /* takes care of avoiding namespaces collisions */
861 if (bdrv_find(node_name)) {
862 error_setg(errp, "node-name=%s is conflicting with a device id",
863 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100864 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100865 }
866
Benoît Canet6913c0c2014-01-23 21:31:33 +0100867 /* takes care of avoiding duplicates node names */
868 if (bdrv_find_node(node_name)) {
869 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100870 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100871 }
872
873 /* copy node name into the bs and insert it into the graph list */
874 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
875 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100876}
877
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200878/*
Kevin Wolf57915332010-04-14 15:24:50 +0200879 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100880 *
881 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200882 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100883static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200884 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200885{
886 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200887 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100888 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200889 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200890
891 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200892 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100893 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200894
Kevin Wolf45673672013-04-22 17:48:40 +0200895 if (file != NULL) {
896 filename = file->filename;
897 } else {
898 filename = qdict_get_try_str(options, "filename");
899 }
900
Kevin Wolf765003d2014-02-03 14:49:42 +0100901 if (drv->bdrv_needs_filename && !filename) {
902 error_setg(errp, "The '%s' block driver requires a file name",
903 drv->format_name);
904 return -EINVAL;
905 }
906
Kevin Wolf45673672013-04-22 17:48:40 +0200907 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100908
Benoît Canet6913c0c2014-01-23 21:31:33 +0100909 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100910 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200911 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100912 error_propagate(errp, local_err);
913 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100914 }
915 qdict_del(options, "node-name");
916
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100917 /* bdrv_open() with directly using a protocol as drv. This layer is already
918 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
919 * and return immediately. */
920 if (file != NULL && drv->bdrv_file_open) {
921 bdrv_swap(file, bs);
922 return 0;
923 }
924
Kevin Wolf57915332010-04-14 15:24:50 +0200925 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100926 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100927 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800928 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800929 open_flags = bdrv_open_flags(bs, flags);
930 bs->read_only = !(open_flags & BDRV_O_RDWR);
931
932 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200933 error_setg(errp,
934 !bs->read_only && bdrv_is_whitelisted(drv, true)
935 ? "Driver '%s' can only be used for read-only devices"
936 : "Driver '%s' is not whitelisted",
937 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800938 return -ENOTSUP;
939 }
Kevin Wolf57915332010-04-14 15:24:50 +0200940
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000941 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200942 if (flags & BDRV_O_COPY_ON_READ) {
943 if (!bs->read_only) {
944 bdrv_enable_copy_on_read(bs);
945 } else {
946 error_setg(errp, "Can't use copy-on-read on read-only device");
947 return -EINVAL;
948 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000949 }
950
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100951 if (filename != NULL) {
952 pstrcpy(bs->filename, sizeof(bs->filename), filename);
953 } else {
954 bs->filename[0] = '\0';
955 }
Kevin Wolf57915332010-04-14 15:24:50 +0200956
Kevin Wolf57915332010-04-14 15:24:50 +0200957 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500958 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200959
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100960 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100961
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200962 /* Open the image, either directly or using a protocol */
963 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100964 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200965 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200966 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100967 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200968 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200969 error_setg(errp, "Can't use '%s' as a block driver for the "
970 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200971 ret = -EINVAL;
972 goto free_and_fail;
973 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100974 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200975 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200976 }
977
Kevin Wolf57915332010-04-14 15:24:50 +0200978 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100979 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200980 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800981 } else if (bs->filename[0]) {
982 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200983 } else {
984 error_setg_errno(errp, -ret, "Could not open image");
985 }
Kevin Wolf57915332010-04-14 15:24:50 +0200986 goto free_and_fail;
987 }
988
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100989 ret = refresh_total_sectors(bs, bs->total_sectors);
990 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200991 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100992 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200993 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100994
Kevin Wolfd34682c2013-12-11 19:26:16 +0100995 bdrv_refresh_limits(bs);
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100996 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +0100997 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +0200998 return 0;
999
1000free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001001 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001002 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001003 bs->opaque = NULL;
1004 bs->drv = NULL;
1005 return ret;
1006}
1007
1008/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001009 * Fills in default options for opening images and converts the legacy
1010 * filename/flags pair to option QDict entries.
1011 */
1012static int bdrv_fill_options(QDict **options, const char *filename,
1013 Error **errp)
1014{
1015 const char *drvname;
1016 bool parse_filename = false;
1017 Error *local_err = NULL;
1018 BlockDriver *drv;
1019
1020 /* Fetch the file name from the options QDict if necessary */
1021 if (filename) {
1022 if (!qdict_haskey(*options, "filename")) {
1023 qdict_put(*options, "filename", qstring_from_str(filename));
1024 parse_filename = true;
1025 } else {
1026 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1027 "the same time");
1028 return -EINVAL;
1029 }
1030 }
1031
1032 /* Find the right block driver */
1033 filename = qdict_get_try_str(*options, "filename");
1034 drvname = qdict_get_try_str(*options, "driver");
1035
1036 if (!drvname) {
1037 if (filename) {
1038 drv = bdrv_find_protocol(filename, parse_filename);
1039 if (!drv) {
1040 error_setg(errp, "Unknown protocol");
1041 return -EINVAL;
1042 }
1043
1044 drvname = drv->format_name;
1045 qdict_put(*options, "driver", qstring_from_str(drvname));
1046 } else {
1047 error_setg(errp, "Must specify either driver or file");
1048 return -EINVAL;
1049 }
1050 }
1051
1052 drv = bdrv_find_format(drvname);
1053 if (!drv) {
1054 error_setg(errp, "Unknown driver '%s'", drvname);
1055 return -ENOENT;
1056 }
1057
1058 /* Driver-specific filename parsing */
1059 if (drv->bdrv_parse_filename && parse_filename) {
1060 drv->bdrv_parse_filename(filename, *options, &local_err);
1061 if (local_err) {
1062 error_propagate(errp, local_err);
1063 return -EINVAL;
1064 }
1065
1066 if (!drv->bdrv_needs_filename) {
1067 qdict_del(*options, "filename");
1068 }
1069 }
1070
1071 return 0;
1072}
1073
1074/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001075 * Opens a file using a protocol (file, host_device, nbd, ...)
Kevin Wolf787e4a82013-03-06 11:52:48 +01001076 *
Max Reitz5acd9d82014-02-18 18:33:11 +01001077 * options is an indirect pointer to a QDict of options to pass to the block
1078 * drivers, or pointer to NULL for an empty set of options. If this function
1079 * takes ownership of the QDict reference, it will set *options to NULL;
1080 * otherwise, it will contain unused/unrecognized options after this function
1081 * returns. Then, the caller is responsible for freeing it. If it intends to
1082 * reuse the QDict, QINCREF() should be called beforehand.
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001083 */
Max Reitzd4446ea2014-02-18 18:33:09 +01001084static int bdrv_file_open(BlockDriverState *bs, const char *filename,
Max Reitz5acd9d82014-02-18 18:33:11 +01001085 QDict **options, int flags, Error **errp)
bellardb3380822004-03-14 21:38:54 +00001086{
Christoph Hellwig6db95602010-04-05 16:53:57 +02001087 BlockDriver *drv;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001088 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001089 Error *local_err = NULL;
bellard83f64092006-08-01 16:21:11 +00001090 int ret;
1091
Kevin Wolff54120f2014-05-26 11:09:59 +02001092 ret = bdrv_fill_options(options, filename, &local_err);
1093 if (local_err) {
1094 error_propagate(errp, local_err);
Kevin Wolf035fccd2013-04-09 14:34:19 +02001095 goto fail;
1096 }
1097
Kevin Wolff54120f2014-05-26 11:09:59 +02001098 filename = qdict_get_try_str(*options, "filename");
1099 drvname = qdict_get_str(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001100
Kevin Wolff54120f2014-05-26 11:09:59 +02001101 drv = bdrv_find_format(drvname);
1102 assert(drv);
1103 qdict_del(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001104
Kevin Wolff54120f2014-05-26 11:09:59 +02001105 /* Open the file */
Max Reitz505d7582013-12-20 19:28:13 +01001106 if (!drv->bdrv_file_open) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001107 ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
1108 *options = NULL;
Max Reitz505d7582013-12-20 19:28:13 +01001109 } else {
Max Reitz5acd9d82014-02-18 18:33:11 +01001110 ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
Max Reitz505d7582013-12-20 19:28:13 +01001111 }
Kevin Wolf707ff822013-03-06 12:20:31 +01001112 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001113 error_propagate(errp, local_err);
Kevin Wolf707ff822013-03-06 12:20:31 +01001114 goto fail;
1115 }
1116
aliguori71d07702009-03-03 17:37:16 +00001117 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +00001118 return 0;
Kevin Wolf707ff822013-03-06 12:20:31 +01001119
1120fail:
Kevin Wolf707ff822013-03-06 12:20:31 +01001121 return ret;
bellardea2384d2004-08-01 21:59:26 +00001122}
bellardfc01f7e2003-06-30 10:03:06 +00001123
Fam Zheng8d24cce2014-05-23 21:29:45 +08001124void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1125{
1126
Fam Zheng826b6ca2014-05-23 21:29:47 +08001127 if (bs->backing_hd) {
1128 assert(bs->backing_blocker);
1129 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1130 } else if (backing_hd) {
1131 error_setg(&bs->backing_blocker,
1132 "device is used as backing hd of '%s'",
1133 bs->device_name);
1134 }
1135
Fam Zheng8d24cce2014-05-23 21:29:45 +08001136 bs->backing_hd = backing_hd;
1137 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001138 error_free(bs->backing_blocker);
1139 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001140 goto out;
1141 }
1142 bs->open_flags &= ~BDRV_O_NO_BACKING;
1143 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1144 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1145 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001146
1147 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1148 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1149 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1150 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001151out:
1152 bdrv_refresh_limits(bs);
1153}
1154
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001155/*
1156 * Opens the backing file for a BlockDriverState if not yet open
1157 *
1158 * options is a QDict of options to pass to the block drivers, or NULL for an
1159 * empty set of options. The reference to the QDict is transferred to this
1160 * function (even on failure), so if the caller intends to reuse the dictionary,
1161 * it needs to use QINCREF() before calling bdrv_file_open.
1162 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001163int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001164{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001165 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001166 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001167 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001168 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001169 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001170
1171 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001172 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001173 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001174 }
1175
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001176 /* NULL means an empty set of options */
1177 if (options == NULL) {
1178 options = qdict_new();
1179 }
1180
Paolo Bonzini9156df12012-10-18 16:49:17 +02001181 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001182 if (qdict_haskey(options, "file.filename")) {
1183 backing_filename[0] = '\0';
1184 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001185 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001186 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001187 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001188 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001189 }
1190
Fam Zheng8d24cce2014-05-23 21:29:45 +08001191 backing_hd = bdrv_new("", errp);
1192
Paolo Bonzini9156df12012-10-18 16:49:17 +02001193 if (bs->backing_format[0] != '\0') {
1194 back_drv = bdrv_find_format(bs->backing_format);
1195 }
1196
Max Reitzf67503e2014-02-18 18:33:05 +01001197 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001198 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001199 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001200 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001201 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001202 bdrv_unref(backing_hd);
1203 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001204 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001205 error_setg(errp, "Could not open backing file: %s",
1206 error_get_pretty(local_err));
1207 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001208 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001209 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001210 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001211
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001212free_exit:
1213 g_free(backing_filename);
1214 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001215}
1216
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001217/*
Max Reitzda557aa2013-12-20 19:28:11 +01001218 * Opens a disk image whose options are given as BlockdevRef in another block
1219 * device's options.
1220 *
Max Reitzda557aa2013-12-20 19:28:11 +01001221 * If allow_none is true, no image will be opened if filename is false and no
1222 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1223 *
1224 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1225 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1226 * itself, all options starting with "${bdref_key}." are considered part of the
1227 * BlockdevRef.
1228 *
1229 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001230 *
1231 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001232 */
1233int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1234 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001235 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001236{
1237 QDict *image_options;
1238 int ret;
1239 char *bdref_key_dot;
1240 const char *reference;
1241
Max Reitzf67503e2014-02-18 18:33:05 +01001242 assert(pbs);
1243 assert(*pbs == NULL);
1244
Max Reitzda557aa2013-12-20 19:28:11 +01001245 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1246 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1247 g_free(bdref_key_dot);
1248
1249 reference = qdict_get_try_str(options, bdref_key);
1250 if (!filename && !reference && !qdict_size(image_options)) {
1251 if (allow_none) {
1252 ret = 0;
1253 } else {
1254 error_setg(errp, "A block device must be specified for \"%s\"",
1255 bdref_key);
1256 ret = -EINVAL;
1257 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001258 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001259 goto done;
1260 }
1261
Max Reitzf7d9fd82014-02-18 18:33:12 +01001262 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001263
1264done:
1265 qdict_del(options, bdref_key);
1266 return ret;
1267}
1268
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001269void bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001270{
1271 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001272 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001273 int64_t total_size;
1274 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001275 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001276 QDict *snapshot_options;
1277 BlockDriverState *bs_snapshot;
1278 Error *local_err;
1279 int ret;
1280
1281 /* if snapshot, we create a temporary backing file and open it
1282 instead of opening 'filename' directly */
1283
1284 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001285 total_size = bdrv_getlength(bs);
1286 if (total_size < 0) {
1287 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001288 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001289 }
1290 total_size &= BDRV_SECTOR_MASK;
Kevin Wolfb9988752014-04-03 12:09:34 +02001291
1292 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001293 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001294 if (ret < 0) {
1295 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001296 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001297 }
1298
1299 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001300 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1301 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001302 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001303 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001304 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001305 if (ret < 0) {
1306 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1307 "'%s': %s", tmp_filename,
1308 error_get_pretty(local_err));
1309 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001310 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001311 }
1312
1313 /* Prepare a new options QDict for the temporary file */
1314 snapshot_options = qdict_new();
1315 qdict_put(snapshot_options, "file.driver",
1316 qstring_from_str("file"));
1317 qdict_put(snapshot_options, "file.filename",
1318 qstring_from_str(tmp_filename));
1319
Kevin Wolf98522f62014-04-17 13:16:01 +02001320 bs_snapshot = bdrv_new("", &error_abort);
Kevin Wolfb9988752014-04-03 12:09:34 +02001321
1322 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001323 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001324 if (ret < 0) {
1325 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001326 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001327 }
1328
1329 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001330
1331out:
1332 g_free(tmp_filename);
Kevin Wolfb9988752014-04-03 12:09:34 +02001333}
1334
Max Reitz4993f7e2014-05-08 20:12:41 +02001335static QDict *parse_json_filename(const char *filename, Error **errp)
1336{
1337 QObject *options_obj;
1338 QDict *options;
1339 int ret;
1340
1341 ret = strstart(filename, "json:", &filename);
1342 assert(ret);
1343
1344 options_obj = qobject_from_json(filename);
1345 if (!options_obj) {
1346 error_setg(errp, "Could not parse the JSON options");
1347 return NULL;
1348 }
1349
1350 if (qobject_type(options_obj) != QTYPE_QDICT) {
1351 qobject_decref(options_obj);
1352 error_setg(errp, "Invalid JSON object given");
1353 return NULL;
1354 }
1355
1356 options = qobject_to_qdict(options_obj);
1357 qdict_flatten(options);
1358
1359 return options;
1360}
1361
Max Reitzda557aa2013-12-20 19:28:11 +01001362/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001363 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001364 *
1365 * options is a QDict of options to pass to the block drivers, or NULL for an
1366 * empty set of options. The reference to the QDict belongs to the block layer
1367 * after the call (even on failure), so if the caller intends to reuse the
1368 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001369 *
1370 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1371 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001372 *
1373 * The reference parameter may be used to specify an existing block device which
1374 * should be opened. If specified, neither options nor a filename may be given,
1375 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001376 */
Max Reitzddf56362014-02-18 18:33:06 +01001377int bdrv_open(BlockDriverState **pbs, const char *filename,
1378 const char *reference, QDict *options, int flags,
1379 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001380{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001381 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001382 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001383 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001384 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001385 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001386
Max Reitzf67503e2014-02-18 18:33:05 +01001387 assert(pbs);
1388
Max Reitzddf56362014-02-18 18:33:06 +01001389 if (reference) {
1390 bool options_non_empty = options ? qdict_size(options) : false;
1391 QDECREF(options);
1392
1393 if (*pbs) {
1394 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1395 "another block device");
1396 return -EINVAL;
1397 }
1398
1399 if (filename || options_non_empty) {
1400 error_setg(errp, "Cannot reference an existing block device with "
1401 "additional options or a new filename");
1402 return -EINVAL;
1403 }
1404
1405 bs = bdrv_lookup_bs(reference, reference, errp);
1406 if (!bs) {
1407 return -ENODEV;
1408 }
1409 bdrv_ref(bs);
1410 *pbs = bs;
1411 return 0;
1412 }
1413
Max Reitzf67503e2014-02-18 18:33:05 +01001414 if (*pbs) {
1415 bs = *pbs;
1416 } else {
Kevin Wolf98522f62014-04-17 13:16:01 +02001417 bs = bdrv_new("", &error_abort);
Max Reitzf67503e2014-02-18 18:33:05 +01001418 }
1419
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001420 /* NULL means an empty set of options */
1421 if (options == NULL) {
1422 options = qdict_new();
1423 }
1424
Max Reitz4993f7e2014-05-08 20:12:41 +02001425 if (filename && g_str_has_prefix(filename, "json:")) {
1426 QDict *json_options = parse_json_filename(filename, &local_err);
1427 if (local_err) {
1428 ret = -EINVAL;
1429 goto fail;
1430 }
1431
1432 /* Options given in the filename have lower priority than options
1433 * specified directly */
1434 qdict_join(options, json_options, false);
1435 QDECREF(json_options);
1436 filename = NULL;
1437 }
1438
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001439 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001440 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001441
Max Reitz5469a2a2014-02-18 18:33:10 +01001442 if (flags & BDRV_O_PROTOCOL) {
1443 assert(!drv);
Max Reitz5acd9d82014-02-18 18:33:11 +01001444 ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
Max Reitz5469a2a2014-02-18 18:33:10 +01001445 &local_err);
Max Reitz5469a2a2014-02-18 18:33:10 +01001446 if (!ret) {
Kevin Wolfeb909c72014-03-06 16:34:46 +01001447 drv = bs->drv;
Max Reitz5acd9d82014-02-18 18:33:11 +01001448 goto done;
Max Reitz5469a2a2014-02-18 18:33:10 +01001449 } else if (bs->drv) {
1450 goto close_and_fail;
1451 } else {
1452 goto fail;
1453 }
1454 }
1455
Kevin Wolff500a6d2012-11-12 17:35:27 +01001456 /* Open image file without format layer */
Jeff Codybe028ad2012-09-20 15:13:17 -04001457 if (flags & BDRV_O_RDWR) {
1458 flags |= BDRV_O_ALLOW_RDWR;
1459 }
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001460 if (flags & BDRV_O_SNAPSHOT) {
1461 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1462 flags = bdrv_backing_flags(flags);
1463 }
Jeff Codybe028ad2012-09-20 15:13:17 -04001464
Max Reitzf67503e2014-02-18 18:33:05 +01001465 assert(file == NULL);
Max Reitz054963f2013-12-20 19:28:12 +01001466 ret = bdrv_open_image(&file, filename, options, "file",
Kevin Wolf0b50cc82014-04-11 21:29:52 +02001467 bdrv_inherited_flags(flags),
1468 true, &local_err);
Max Reitz054963f2013-12-20 19:28:12 +01001469 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001470 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001471 }
1472
1473 /* Find the right image format driver */
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001474 drvname = qdict_get_try_str(options, "driver");
1475 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001476 drv = bdrv_find_format(drvname);
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001477 qdict_del(options, "driver");
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001478 if (!drv) {
1479 error_setg(errp, "Invalid driver: '%s'", drvname);
1480 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001481 goto fail;
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001482 }
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001483 }
1484
Kevin Wolff500a6d2012-11-12 17:35:27 +01001485 if (!drv) {
Max Reitz2a05cbe2013-12-20 19:28:10 +01001486 if (file) {
1487 ret = find_image_format(file, filename, &drv, &local_err);
1488 } else {
1489 error_setg(errp, "Must specify either driver or file");
1490 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001491 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001492 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001493 }
1494
1495 if (!drv) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001496 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001497 }
1498
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001499 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001500 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001501 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001502 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001503 }
1504
Max Reitz2a05cbe2013-12-20 19:28:10 +01001505 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001506 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001507 file = NULL;
1508 }
1509
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001510 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001511 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001512 QDict *backing_options;
1513
Benoît Canet5726d872013-09-25 13:30:01 +02001514 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001515 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001516 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001517 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001518 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001519 }
1520
Kevin Wolfb9988752014-04-03 12:09:34 +02001521 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1522 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001523 if (snapshot_flags) {
1524 bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001525 if (local_err) {
1526 error_propagate(errp, local_err);
1527 goto close_and_fail;
1528 }
1529 }
1530
1531
Max Reitz5acd9d82014-02-18 18:33:11 +01001532done:
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001533 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001534 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001535 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001536 if (flags & BDRV_O_PROTOCOL) {
1537 error_setg(errp, "Block protocol '%s' doesn't support the option "
1538 "'%s'", drv->format_name, entry->key);
1539 } else {
1540 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1541 "support the option '%s'", drv->format_name,
1542 bs->device_name, entry->key);
1543 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001544
1545 ret = -EINVAL;
1546 goto close_and_fail;
1547 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001548
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001549 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001550 bdrv_dev_change_media_cb(bs, true);
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001551 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1552 && !runstate_check(RUN_STATE_INMIGRATE)
1553 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1554 error_setg(errp,
1555 "Guest must be stopped for opening of encrypted image");
1556 ret = -EBUSY;
1557 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001558 }
1559
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001560 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001561 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001562 return 0;
1563
Kevin Wolf8bfea152014-04-11 19:16:36 +02001564fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001565 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001566 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001567 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001568 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001569 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001570 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001571 if (!*pbs) {
1572 /* If *pbs is NULL, a new BDS has been created in this function and
1573 needs to be freed now. Otherwise, it does not need to be closed,
1574 since it has not really been opened yet. */
1575 bdrv_unref(bs);
1576 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001577 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001578 error_propagate(errp, local_err);
1579 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001580 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001581
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001582close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001583 /* See fail path, but now the BDS has to be always closed */
1584 if (*pbs) {
1585 bdrv_close(bs);
1586 } else {
1587 bdrv_unref(bs);
1588 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001589 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001590 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001591 error_propagate(errp, local_err);
1592 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001593 return ret;
1594}
1595
Jeff Codye971aa12012-09-20 15:13:19 -04001596typedef struct BlockReopenQueueEntry {
1597 bool prepared;
1598 BDRVReopenState state;
1599 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1600} BlockReopenQueueEntry;
1601
1602/*
1603 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1604 * reopen of multiple devices.
1605 *
1606 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1607 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1608 * be created and initialized. This newly created BlockReopenQueue should be
1609 * passed back in for subsequent calls that are intended to be of the same
1610 * atomic 'set'.
1611 *
1612 * bs is the BlockDriverState to add to the reopen queue.
1613 *
1614 * flags contains the open flags for the associated bs
1615 *
1616 * returns a pointer to bs_queue, which is either the newly allocated
1617 * bs_queue, or the existing bs_queue being used.
1618 *
1619 */
1620BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1621 BlockDriverState *bs, int flags)
1622{
1623 assert(bs != NULL);
1624
1625 BlockReopenQueueEntry *bs_entry;
1626 if (bs_queue == NULL) {
1627 bs_queue = g_new0(BlockReopenQueue, 1);
1628 QSIMPLEQ_INIT(bs_queue);
1629 }
1630
Kevin Wolff1f25a22014-04-25 19:04:55 +02001631 /* bdrv_open() masks this flag out */
1632 flags &= ~BDRV_O_PROTOCOL;
1633
Jeff Codye971aa12012-09-20 15:13:19 -04001634 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001635 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001636 }
1637
1638 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1639 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1640
1641 bs_entry->state.bs = bs;
1642 bs_entry->state.flags = flags;
1643
1644 return bs_queue;
1645}
1646
1647/*
1648 * Reopen multiple BlockDriverStates atomically & transactionally.
1649 *
1650 * The queue passed in (bs_queue) must have been built up previous
1651 * via bdrv_reopen_queue().
1652 *
1653 * Reopens all BDS specified in the queue, with the appropriate
1654 * flags. All devices are prepared for reopen, and failure of any
1655 * device will cause all device changes to be abandonded, and intermediate
1656 * data cleaned up.
1657 *
1658 * If all devices prepare successfully, then the changes are committed
1659 * to all devices.
1660 *
1661 */
1662int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1663{
1664 int ret = -1;
1665 BlockReopenQueueEntry *bs_entry, *next;
1666 Error *local_err = NULL;
1667
1668 assert(bs_queue != NULL);
1669
1670 bdrv_drain_all();
1671
1672 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1673 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1674 error_propagate(errp, local_err);
1675 goto cleanup;
1676 }
1677 bs_entry->prepared = true;
1678 }
1679
1680 /* If we reach this point, we have success and just need to apply the
1681 * changes
1682 */
1683 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1684 bdrv_reopen_commit(&bs_entry->state);
1685 }
1686
1687 ret = 0;
1688
1689cleanup:
1690 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1691 if (ret && bs_entry->prepared) {
1692 bdrv_reopen_abort(&bs_entry->state);
1693 }
1694 g_free(bs_entry);
1695 }
1696 g_free(bs_queue);
1697 return ret;
1698}
1699
1700
1701/* Reopen a single BlockDriverState with the specified flags. */
1702int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1703{
1704 int ret = -1;
1705 Error *local_err = NULL;
1706 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1707
1708 ret = bdrv_reopen_multiple(queue, &local_err);
1709 if (local_err != NULL) {
1710 error_propagate(errp, local_err);
1711 }
1712 return ret;
1713}
1714
1715
1716/*
1717 * Prepares a BlockDriverState for reopen. All changes are staged in the
1718 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1719 * the block driver layer .bdrv_reopen_prepare()
1720 *
1721 * bs is the BlockDriverState to reopen
1722 * flags are the new open flags
1723 * queue is the reopen queue
1724 *
1725 * Returns 0 on success, non-zero on error. On error errp will be set
1726 * as well.
1727 *
1728 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1729 * It is the responsibility of the caller to then call the abort() or
1730 * commit() for any other BDS that have been left in a prepare() state
1731 *
1732 */
1733int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1734 Error **errp)
1735{
1736 int ret = -1;
1737 Error *local_err = NULL;
1738 BlockDriver *drv;
1739
1740 assert(reopen_state != NULL);
1741 assert(reopen_state->bs->drv != NULL);
1742 drv = reopen_state->bs->drv;
1743
1744 /* if we are to stay read-only, do not allow permission change
1745 * to r/w */
1746 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1747 reopen_state->flags & BDRV_O_RDWR) {
1748 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1749 reopen_state->bs->device_name);
1750 goto error;
1751 }
1752
1753
1754 ret = bdrv_flush(reopen_state->bs);
1755 if (ret) {
1756 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1757 strerror(-ret));
1758 goto error;
1759 }
1760
1761 if (drv->bdrv_reopen_prepare) {
1762 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1763 if (ret) {
1764 if (local_err != NULL) {
1765 error_propagate(errp, local_err);
1766 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001767 error_setg(errp, "failed while preparing to reopen image '%s'",
1768 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001769 }
1770 goto error;
1771 }
1772 } else {
1773 /* It is currently mandatory to have a bdrv_reopen_prepare()
1774 * handler for each supported drv. */
1775 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1776 drv->format_name, reopen_state->bs->device_name,
1777 "reopening of file");
1778 ret = -1;
1779 goto error;
1780 }
1781
1782 ret = 0;
1783
1784error:
1785 return ret;
1786}
1787
1788/*
1789 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1790 * makes them final by swapping the staging BlockDriverState contents into
1791 * the active BlockDriverState contents.
1792 */
1793void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1794{
1795 BlockDriver *drv;
1796
1797 assert(reopen_state != NULL);
1798 drv = reopen_state->bs->drv;
1799 assert(drv != NULL);
1800
1801 /* If there are any driver level actions to take */
1802 if (drv->bdrv_reopen_commit) {
1803 drv->bdrv_reopen_commit(reopen_state);
1804 }
1805
1806 /* set BDS specific flags now */
1807 reopen_state->bs->open_flags = reopen_state->flags;
1808 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1809 BDRV_O_CACHE_WB);
1810 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001811
1812 bdrv_refresh_limits(reopen_state->bs);
Jeff Codye971aa12012-09-20 15:13:19 -04001813}
1814
1815/*
1816 * Abort the reopen, and delete and free the staged changes in
1817 * reopen_state
1818 */
1819void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1820{
1821 BlockDriver *drv;
1822
1823 assert(reopen_state != NULL);
1824 drv = reopen_state->bs->drv;
1825 assert(drv != NULL);
1826
1827 if (drv->bdrv_reopen_abort) {
1828 drv->bdrv_reopen_abort(reopen_state);
1829 }
1830}
1831
1832
bellardfc01f7e2003-06-30 10:03:06 +00001833void bdrv_close(BlockDriverState *bs)
1834{
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001835 if (bs->job) {
1836 block_job_cancel_sync(bs->job);
1837 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001838 bdrv_drain_all(); /* complete I/O */
1839 bdrv_flush(bs);
1840 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001841 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001842
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001843 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001844 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001845 BlockDriverState *backing_hd = bs->backing_hd;
1846 bdrv_set_backing_hd(bs, NULL);
1847 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001848 }
bellardea2384d2004-08-01 21:59:26 +00001849 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001850 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001851 bs->opaque = NULL;
1852 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001853 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001854 bs->backing_file[0] = '\0';
1855 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001856 bs->total_sectors = 0;
1857 bs->encrypted = 0;
1858 bs->valid_key = 0;
1859 bs->sg = 0;
1860 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001861 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001862 QDECREF(bs->options);
1863 bs->options = NULL;
bellardb3380822004-03-14 21:38:54 +00001864
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001865 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001866 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001867 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001868 }
bellardb3380822004-03-14 21:38:54 +00001869 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001870
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001871 bdrv_dev_change_media_cb(bs, false);
1872
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001873 /*throttling disk I/O limits*/
1874 if (bs->io_limits_enabled) {
1875 bdrv_io_limits_disable(bs);
1876 }
bellardb3380822004-03-14 21:38:54 +00001877}
1878
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001879void bdrv_close_all(void)
1880{
1881 BlockDriverState *bs;
1882
Benoît Canetdc364f42014-01-23 21:31:32 +01001883 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001884 AioContext *aio_context = bdrv_get_aio_context(bs);
1885
1886 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001887 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001888 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001889 }
1890}
1891
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001892/* Check if any requests are in-flight (including throttled requests) */
1893static bool bdrv_requests_pending(BlockDriverState *bs)
1894{
1895 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1896 return true;
1897 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001898 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1899 return true;
1900 }
1901 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001902 return true;
1903 }
1904 if (bs->file && bdrv_requests_pending(bs->file)) {
1905 return true;
1906 }
1907 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1908 return true;
1909 }
1910 return false;
1911}
1912
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001913/*
1914 * Wait for pending requests to complete across all BlockDriverStates
1915 *
1916 * This function does not flush data to disk, use bdrv_flush_all() for that
1917 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001918 *
1919 * Note that completion of an asynchronous I/O operation can trigger any
1920 * number of other I/O operations on other devices---for example a coroutine
1921 * can be arbitrarily complex and a constant flow of I/O can come until the
1922 * coroutine is complete. Because of this, it is not possible to have a
1923 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001924 */
1925void bdrv_drain_all(void)
1926{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001927 /* Always run first iteration so any pending completion BHs run */
1928 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001929 BlockDriverState *bs;
1930
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001931 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001932 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001933
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001934 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1935 AioContext *aio_context = bdrv_get_aio_context(bs);
1936 bool bs_busy;
1937
1938 aio_context_acquire(aio_context);
1939 bdrv_start_throttled_reqs(bs);
1940 bs_busy = bdrv_requests_pending(bs);
1941 bs_busy |= aio_poll(aio_context, bs_busy);
1942 aio_context_release(aio_context);
1943
1944 busy |= bs_busy;
1945 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001946 }
1947}
1948
Benoît Canetdc364f42014-01-23 21:31:32 +01001949/* make a BlockDriverState anonymous by removing from bdrv_state and
1950 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001951 Also, NULL terminate the device_name to prevent double remove */
1952void bdrv_make_anon(BlockDriverState *bs)
1953{
1954 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001955 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001956 }
1957 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001958 if (bs->node_name[0] != '\0') {
1959 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1960 }
1961 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001962}
1963
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001964static void bdrv_rebind(BlockDriverState *bs)
1965{
1966 if (bs->drv && bs->drv->bdrv_rebind) {
1967 bs->drv->bdrv_rebind(bs);
1968 }
1969}
1970
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001971static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1972 BlockDriverState *bs_src)
1973{
1974 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001975
1976 /* dev info */
1977 bs_dest->dev_ops = bs_src->dev_ops;
1978 bs_dest->dev_opaque = bs_src->dev_opaque;
1979 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001980 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001981 bs_dest->copy_on_read = bs_src->copy_on_read;
1982
1983 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1984
Benoît Canetcc0681c2013-09-02 14:14:39 +02001985 /* i/o throttled req */
1986 memcpy(&bs_dest->throttle_state,
1987 &bs_src->throttle_state,
1988 sizeof(ThrottleState));
1989 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1990 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001991 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1992
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001993 /* r/w error */
1994 bs_dest->on_read_error = bs_src->on_read_error;
1995 bs_dest->on_write_error = bs_src->on_write_error;
1996
1997 /* i/o status */
1998 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1999 bs_dest->iostatus = bs_src->iostatus;
2000
2001 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002002 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002003
Fam Zheng9fcb0252013-08-23 09:14:46 +08002004 /* reference count */
2005 bs_dest->refcnt = bs_src->refcnt;
2006
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002007 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002008 bs_dest->job = bs_src->job;
2009
2010 /* keep the same entry in bdrv_states */
2011 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
2012 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01002013 bs_dest->device_list = bs_src->device_list;
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002014 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2015 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002016}
2017
2018/*
2019 * Swap bs contents for two image chains while they are live,
2020 * while keeping required fields on the BlockDriverState that is
2021 * actually attached to a device.
2022 *
2023 * This will modify the BlockDriverState fields, and swap contents
2024 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2025 *
2026 * bs_new is required to be anonymous.
2027 *
2028 * This function does not create any image files.
2029 */
2030void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2031{
2032 BlockDriverState tmp;
2033
Benoît Canet90ce8a02014-03-05 23:48:29 +01002034 /* The code needs to swap the node_name but simply swapping node_list won't
2035 * work so first remove the nodes from the graph list, do the swap then
2036 * insert them back if needed.
2037 */
2038 if (bs_new->node_name[0] != '\0') {
2039 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2040 }
2041 if (bs_old->node_name[0] != '\0') {
2042 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2043 }
2044
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002045 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2046 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08002047 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002048 assert(bs_new->job == NULL);
2049 assert(bs_new->dev == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002050 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002051 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002052
2053 tmp = *bs_new;
2054 *bs_new = *bs_old;
2055 *bs_old = tmp;
2056
2057 /* there are some fields that should not be swapped, move them back */
2058 bdrv_move_feature_fields(&tmp, bs_old);
2059 bdrv_move_feature_fields(bs_old, bs_new);
2060 bdrv_move_feature_fields(bs_new, &tmp);
2061
2062 /* bs_new shouldn't be in bdrv_states even after the swap! */
2063 assert(bs_new->device_name[0] == '\0');
2064
2065 /* Check a few fields that should remain attached to the device */
2066 assert(bs_new->dev == NULL);
2067 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002068 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002069 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002070
Benoît Canet90ce8a02014-03-05 23:48:29 +01002071 /* insert the nodes back into the graph node list if needed */
2072 if (bs_new->node_name[0] != '\0') {
2073 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2074 }
2075 if (bs_old->node_name[0] != '\0') {
2076 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2077 }
2078
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002079 bdrv_rebind(bs_new);
2080 bdrv_rebind(bs_old);
2081}
2082
Jeff Cody8802d1f2012-02-28 15:54:06 -05002083/*
2084 * Add new bs contents at the top of an image chain while the chain is
2085 * live, while keeping required fields on the top layer.
2086 *
2087 * This will modify the BlockDriverState fields, and swap contents
2088 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2089 *
Jeff Codyf6801b82012-03-27 16:30:19 -04002090 * bs_new is required to be anonymous.
2091 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002092 * This function does not create any image files.
2093 */
2094void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2095{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002096 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002097
2098 /* The contents of 'tmp' will become bs_top, as we are
2099 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002100 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002101}
2102
Fam Zheng4f6fd342013-08-23 09:14:47 +08002103static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002104{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002105 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02002106 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002107 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002108 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002109 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002110
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002111 bdrv_close(bs);
2112
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002113 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002114 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002115
Anthony Liguori7267c092011-08-20 22:09:37 -05002116 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002117}
2118
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002119int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2120/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02002121{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002122 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02002123 return -EBUSY;
2124 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002125 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03002126 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02002127 return 0;
2128}
2129
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002130/* TODO qdevified devices don't use this, remove when devices are qdevified */
2131void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02002132{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002133 if (bdrv_attach_dev(bs, dev) < 0) {
2134 abort();
2135 }
2136}
2137
2138void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2139/* TODO change to DeviceState *dev when all users are qdevified */
2140{
2141 assert(bs->dev == dev);
2142 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02002143 bs->dev_ops = NULL;
2144 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002145 bs->guest_block_size = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02002146}
2147
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002148/* TODO change to return DeviceState * when all users are qdevified */
2149void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002150{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002151 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002152}
2153
Markus Armbruster0e49de52011-08-03 15:07:41 +02002154void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2155 void *opaque)
2156{
2157 bs->dev_ops = ops;
2158 bs->dev_opaque = opaque;
2159}
2160
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002161static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002162{
Markus Armbruster145feb12011-08-03 15:07:42 +02002163 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002164 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002165 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002166 if (tray_was_closed) {
2167 /* tray open */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002168 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2169 true, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002170 }
2171 if (load) {
2172 /* tray close */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002173 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2174 false, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002175 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002176 }
2177}
2178
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002179bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2180{
2181 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2182}
2183
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002184void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2185{
2186 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2187 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2188 }
2189}
2190
Markus Armbrustere4def802011-09-06 18:58:53 +02002191bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2192{
2193 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2194 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2195 }
2196 return false;
2197}
2198
Markus Armbruster145feb12011-08-03 15:07:42 +02002199static void bdrv_dev_resize_cb(BlockDriverState *bs)
2200{
2201 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2202 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002203 }
2204}
2205
Markus Armbrusterf1076392011-09-06 18:58:46 +02002206bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2207{
2208 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2209 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2210 }
2211 return false;
2212}
2213
aliguorie97fc192009-04-21 23:11:50 +00002214/*
2215 * Run consistency checks on an image
2216 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002217 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002218 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002219 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002220 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002221int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002222{
2223 if (bs->drv->bdrv_check == NULL) {
2224 return -ENOTSUP;
2225 }
2226
Kevin Wolfe076f332010-06-29 11:43:13 +02002227 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002228 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002229}
2230
Kevin Wolf8a426612010-07-16 17:17:01 +02002231#define COMMIT_BUF_SECTORS 2048
2232
bellard33e39632003-07-06 17:15:21 +00002233/* commit COW file into the raw image */
2234int bdrv_commit(BlockDriverState *bs)
2235{
bellard19cb3732006-08-19 11:45:59 +00002236 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002237 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002238 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002239 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002240 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002241 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002242
bellard19cb3732006-08-19 11:45:59 +00002243 if (!drv)
2244 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002245
2246 if (!bs->backing_hd) {
2247 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002248 }
2249
Fam Zheng3718d8a2014-05-23 21:29:43 +08002250 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2251 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002252 return -EBUSY;
2253 }
2254
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002255 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002256 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2257 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002258 open_flags = bs->backing_hd->open_flags;
2259
2260 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002261 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2262 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002263 }
bellard33e39632003-07-06 17:15:21 +00002264 }
bellardea2384d2004-08-01 21:59:26 +00002265
Jeff Cody72706ea2014-01-24 09:02:35 -05002266 length = bdrv_getlength(bs);
2267 if (length < 0) {
2268 ret = length;
2269 goto ro_cleanup;
2270 }
2271
2272 backing_length = bdrv_getlength(bs->backing_hd);
2273 if (backing_length < 0) {
2274 ret = backing_length;
2275 goto ro_cleanup;
2276 }
2277
2278 /* If our top snapshot is larger than the backing file image,
2279 * grow the backing file image if possible. If not possible,
2280 * we must return an error */
2281 if (length > backing_length) {
2282 ret = bdrv_truncate(bs->backing_hd, length);
2283 if (ret < 0) {
2284 goto ro_cleanup;
2285 }
2286 }
2287
2288 total_sectors = length >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05002289 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00002290
Kevin Wolf8a426612010-07-16 17:17:01 +02002291 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002292 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2293 if (ret < 0) {
2294 goto ro_cleanup;
2295 }
2296 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002297 ret = bdrv_read(bs, sector, buf, n);
2298 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002299 goto ro_cleanup;
2300 }
2301
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002302 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2303 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002304 goto ro_cleanup;
2305 }
bellardea2384d2004-08-01 21:59:26 +00002306 }
2307 }
bellard95389c82005-12-18 18:28:15 +00002308
Christoph Hellwig1d449522010-01-17 12:32:30 +01002309 if (drv->bdrv_make_empty) {
2310 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002311 if (ret < 0) {
2312 goto ro_cleanup;
2313 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002314 bdrv_flush(bs);
2315 }
bellard95389c82005-12-18 18:28:15 +00002316
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002317 /*
2318 * Make sure all data we wrote to the backing device is actually
2319 * stable on disk.
2320 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002321 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002322 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002323 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002324
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002325 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002326ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05002327 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002328
2329 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002330 /* ignoring error return here */
2331 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002332 }
2333
Christoph Hellwig1d449522010-01-17 12:32:30 +01002334 return ret;
bellard33e39632003-07-06 17:15:21 +00002335}
2336
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002337int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002338{
2339 BlockDriverState *bs;
2340
Benoît Canetdc364f42014-01-23 21:31:32 +01002341 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002342 AioContext *aio_context = bdrv_get_aio_context(bs);
2343
2344 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002345 if (bs->drv && bs->backing_hd) {
2346 int ret = bdrv_commit(bs);
2347 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002348 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002349 return ret;
2350 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002351 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002352 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002353 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002354 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002355}
2356
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002357/**
2358 * Remove an active request from the tracked requests list
2359 *
2360 * This function should be called when a tracked request is completing.
2361 */
2362static void tracked_request_end(BdrvTrackedRequest *req)
2363{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002364 if (req->serialising) {
2365 req->bs->serialising_in_flight--;
2366 }
2367
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002368 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002369 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002370}
2371
2372/**
2373 * Add an active request to the tracked requests list
2374 */
2375static void tracked_request_begin(BdrvTrackedRequest *req,
2376 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002377 int64_t offset,
2378 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002379{
2380 *req = (BdrvTrackedRequest){
2381 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002382 .offset = offset,
2383 .bytes = bytes,
2384 .is_write = is_write,
2385 .co = qemu_coroutine_self(),
2386 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002387 .overlap_offset = offset,
2388 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002389 };
2390
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002391 qemu_co_queue_init(&req->wait_queue);
2392
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002393 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2394}
2395
Kevin Wolfe96126f2014-02-08 10:42:18 +01002396static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002397{
Kevin Wolf73271452013-12-04 17:08:50 +01002398 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002399 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2400 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002401
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002402 if (!req->serialising) {
2403 req->bs->serialising_in_flight++;
2404 req->serialising = true;
2405 }
Kevin Wolf73271452013-12-04 17:08:50 +01002406
2407 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2408 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002409}
2410
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002411/**
2412 * Round a region to cluster boundaries
2413 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002414void bdrv_round_to_clusters(BlockDriverState *bs,
2415 int64_t sector_num, int nb_sectors,
2416 int64_t *cluster_sector_num,
2417 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002418{
2419 BlockDriverInfo bdi;
2420
2421 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2422 *cluster_sector_num = sector_num;
2423 *cluster_nb_sectors = nb_sectors;
2424 } else {
2425 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2426 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2427 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2428 nb_sectors, c);
2429 }
2430}
2431
Kevin Wolf73271452013-12-04 17:08:50 +01002432static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002433{
2434 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002435 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002436
Kevin Wolf73271452013-12-04 17:08:50 +01002437 ret = bdrv_get_info(bs, &bdi);
2438 if (ret < 0 || bdi.cluster_size == 0) {
2439 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002440 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002441 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002442 }
2443}
2444
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002445static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002446 int64_t offset, unsigned int bytes)
2447{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002448 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002449 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002450 return false;
2451 }
2452 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002453 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002454 return false;
2455 }
2456 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002457}
2458
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002459static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002460{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002461 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002462 BdrvTrackedRequest *req;
2463 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002464 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002465
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002466 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002467 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002468 }
2469
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002470 do {
2471 retry = false;
2472 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002473 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002474 continue;
2475 }
Kevin Wolf73271452013-12-04 17:08:50 +01002476 if (tracked_request_overlaps(req, self->overlap_offset,
2477 self->overlap_bytes))
2478 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002479 /* Hitting this means there was a reentrant request, for
2480 * example, a block driver issuing nested requests. This must
2481 * never happen since it means deadlock.
2482 */
2483 assert(qemu_coroutine_self() != req->co);
2484
Kevin Wolf64604402013-12-13 13:04:35 +01002485 /* If the request is already (indirectly) waiting for us, or
2486 * will wait for us as soon as it wakes up, then just go on
2487 * (instead of producing a deadlock in the former case). */
2488 if (!req->waiting_for) {
2489 self->waiting_for = req;
2490 qemu_co_queue_wait(&req->wait_queue);
2491 self->waiting_for = NULL;
2492 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002493 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002494 break;
2495 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002496 }
2497 }
2498 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002499
2500 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002501}
2502
Kevin Wolf756e6732010-01-12 12:55:17 +01002503/*
2504 * Return values:
2505 * 0 - success
2506 * -EINVAL - backing format specified, but no file
2507 * -ENOSPC - can't update the backing file because no space is left in the
2508 * image file header
2509 * -ENOTSUP - format driver doesn't support changing the backing file
2510 */
2511int bdrv_change_backing_file(BlockDriverState *bs,
2512 const char *backing_file, const char *backing_fmt)
2513{
2514 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002515 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002516
Paolo Bonzini5f377792012-04-12 14:01:01 +02002517 /* Backing file format doesn't make sense without a backing file */
2518 if (backing_fmt && !backing_file) {
2519 return -EINVAL;
2520 }
2521
Kevin Wolf756e6732010-01-12 12:55:17 +01002522 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002523 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002524 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002525 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002526 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002527
2528 if (ret == 0) {
2529 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2530 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2531 }
2532 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002533}
2534
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002535/*
2536 * Finds the image layer in the chain that has 'bs' as its backing file.
2537 *
2538 * active is the current topmost image.
2539 *
2540 * Returns NULL if bs is not found in active's image chain,
2541 * or if active == bs.
2542 */
2543BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2544 BlockDriverState *bs)
2545{
2546 BlockDriverState *overlay = NULL;
2547 BlockDriverState *intermediate;
2548
2549 assert(active != NULL);
2550 assert(bs != NULL);
2551
2552 /* if bs is the same as active, then by definition it has no overlay
2553 */
2554 if (active == bs) {
2555 return NULL;
2556 }
2557
2558 intermediate = active;
2559 while (intermediate->backing_hd) {
2560 if (intermediate->backing_hd == bs) {
2561 overlay = intermediate;
2562 break;
2563 }
2564 intermediate = intermediate->backing_hd;
2565 }
2566
2567 return overlay;
2568}
2569
2570typedef struct BlkIntermediateStates {
2571 BlockDriverState *bs;
2572 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2573} BlkIntermediateStates;
2574
2575
2576/*
2577 * Drops images above 'base' up to and including 'top', and sets the image
2578 * above 'top' to have base as its backing file.
2579 *
2580 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2581 * information in 'bs' can be properly updated.
2582 *
2583 * E.g., this will convert the following chain:
2584 * bottom <- base <- intermediate <- top <- active
2585 *
2586 * to
2587 *
2588 * bottom <- base <- active
2589 *
2590 * It is allowed for bottom==base, in which case it converts:
2591 *
2592 * base <- intermediate <- top <- active
2593 *
2594 * to
2595 *
2596 * base <- active
2597 *
2598 * Error conditions:
2599 * if active == top, that is considered an error
2600 *
2601 */
2602int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2603 BlockDriverState *base)
2604{
2605 BlockDriverState *intermediate;
2606 BlockDriverState *base_bs = NULL;
2607 BlockDriverState *new_top_bs = NULL;
2608 BlkIntermediateStates *intermediate_state, *next;
2609 int ret = -EIO;
2610
2611 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2612 QSIMPLEQ_INIT(&states_to_delete);
2613
2614 if (!top->drv || !base->drv) {
2615 goto exit;
2616 }
2617
2618 new_top_bs = bdrv_find_overlay(active, top);
2619
2620 if (new_top_bs == NULL) {
2621 /* we could not find the image above 'top', this is an error */
2622 goto exit;
2623 }
2624
2625 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2626 * to do, no intermediate images */
2627 if (new_top_bs->backing_hd == base) {
2628 ret = 0;
2629 goto exit;
2630 }
2631
2632 intermediate = top;
2633
2634 /* now we will go down through the list, and add each BDS we find
2635 * into our deletion queue, until we hit the 'base'
2636 */
2637 while (intermediate) {
2638 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2639 intermediate_state->bs = intermediate;
2640 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2641
2642 if (intermediate->backing_hd == base) {
2643 base_bs = intermediate->backing_hd;
2644 break;
2645 }
2646 intermediate = intermediate->backing_hd;
2647 }
2648 if (base_bs == NULL) {
2649 /* something went wrong, we did not end at the base. safely
2650 * unravel everything, and exit with error */
2651 goto exit;
2652 }
2653
2654 /* success - we can delete the intermediate states, and link top->base */
2655 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2656 base_bs->drv ? base_bs->drv->format_name : "");
2657 if (ret) {
2658 goto exit;
2659 }
Fam Zheng920beae2014-05-23 21:29:46 +08002660 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002661
2662 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2663 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002664 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002665 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002666 }
2667 ret = 0;
2668
2669exit:
2670 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2671 g_free(intermediate_state);
2672 }
2673 return ret;
2674}
2675
2676
aliguori71d07702009-03-03 17:37:16 +00002677static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2678 size_t size)
2679{
2680 int64_t len;
2681
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002682 if (size > INT_MAX) {
2683 return -EIO;
2684 }
2685
aliguori71d07702009-03-03 17:37:16 +00002686 if (!bdrv_is_inserted(bs))
2687 return -ENOMEDIUM;
2688
2689 if (bs->growable)
2690 return 0;
2691
2692 len = bdrv_getlength(bs);
2693
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002694 if (offset < 0)
2695 return -EIO;
2696
2697 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002698 return -EIO;
2699
2700 return 0;
2701}
2702
2703static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2704 int nb_sectors)
2705{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002706 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002707 return -EIO;
2708 }
2709
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002710 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2711 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002712}
2713
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002714typedef struct RwCo {
2715 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002716 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002717 QEMUIOVector *qiov;
2718 bool is_write;
2719 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002720 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002721} RwCo;
2722
2723static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2724{
2725 RwCo *rwco = opaque;
2726
2727 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002728 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2729 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002730 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002731 } else {
2732 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2733 rwco->qiov->size, rwco->qiov,
2734 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002735 }
2736}
2737
2738/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002739 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002740 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002741static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2742 QEMUIOVector *qiov, bool is_write,
2743 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002744{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002745 Coroutine *co;
2746 RwCo rwco = {
2747 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002748 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002749 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002750 .is_write = is_write,
2751 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002752 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002753 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002754
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002755 /**
2756 * In sync call context, when the vcpu is blocked, this throttling timer
2757 * will not fire; so the I/O throttling function has to be disabled here
2758 * if it has been enabled.
2759 */
2760 if (bs->io_limits_enabled) {
2761 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2762 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2763 bdrv_io_limits_disable(bs);
2764 }
2765
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002766 if (qemu_in_coroutine()) {
2767 /* Fast-path if already in coroutine context */
2768 bdrv_rw_co_entry(&rwco);
2769 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002770 AioContext *aio_context = bdrv_get_aio_context(bs);
2771
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002772 co = qemu_coroutine_create(bdrv_rw_co_entry);
2773 qemu_coroutine_enter(co, &rwco);
2774 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002775 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002776 }
2777 }
2778 return rwco.ret;
2779}
2780
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002781/*
2782 * Process a synchronous request using coroutines
2783 */
2784static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002785 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002786{
2787 QEMUIOVector qiov;
2788 struct iovec iov = {
2789 .iov_base = (void *)buf,
2790 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2791 };
2792
Kevin Wolfda15ee52014-04-14 15:39:36 +02002793 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2794 return -EINVAL;
2795 }
2796
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002797 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002798 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2799 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002800}
2801
bellard19cb3732006-08-19 11:45:59 +00002802/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002803int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002804 uint8_t *buf, int nb_sectors)
2805{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002806 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002807}
2808
Markus Armbruster07d27a42012-06-29 17:34:29 +02002809/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2810int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2811 uint8_t *buf, int nb_sectors)
2812{
2813 bool enabled;
2814 int ret;
2815
2816 enabled = bs->io_limits_enabled;
2817 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002818 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002819 bs->io_limits_enabled = enabled;
2820 return ret;
2821}
2822
ths5fafdf22007-09-16 21:08:06 +00002823/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002824 -EIO generic I/O error (may happen for all errors)
2825 -ENOMEDIUM No media inserted.
2826 -EINVAL Invalid sector number or nb_sectors
2827 -EACCES Trying to write a read-only device
2828*/
ths5fafdf22007-09-16 21:08:06 +00002829int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002830 const uint8_t *buf, int nb_sectors)
2831{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002832 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002833}
2834
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002835int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2836 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002837{
2838 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002839 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002840}
2841
Peter Lievend75cbb52013-10-24 12:07:03 +02002842/*
2843 * Completely zero out a block device with the help of bdrv_write_zeroes.
2844 * The operation is sped up by checking the block status and only writing
2845 * zeroes to the device if they currently do not return zeroes. Optional
2846 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2847 *
2848 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2849 */
2850int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2851{
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002852 int64_t target_size;
Peter Lievend75cbb52013-10-24 12:07:03 +02002853 int64_t ret, nb_sectors, sector_num = 0;
2854 int n;
2855
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002856 target_size = bdrv_getlength(bs);
2857 if (target_size < 0) {
2858 return target_size;
2859 }
2860 target_size /= BDRV_SECTOR_SIZE;
2861
Peter Lievend75cbb52013-10-24 12:07:03 +02002862 for (;;) {
2863 nb_sectors = target_size - sector_num;
2864 if (nb_sectors <= 0) {
2865 return 0;
2866 }
2867 if (nb_sectors > INT_MAX) {
2868 nb_sectors = INT_MAX;
2869 }
2870 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002871 if (ret < 0) {
2872 error_report("error getting block status at sector %" PRId64 ": %s",
2873 sector_num, strerror(-ret));
2874 return ret;
2875 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002876 if (ret & BDRV_BLOCK_ZERO) {
2877 sector_num += n;
2878 continue;
2879 }
2880 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2881 if (ret < 0) {
2882 error_report("error writing zeroes at sector %" PRId64 ": %s",
2883 sector_num, strerror(-ret));
2884 return ret;
2885 }
2886 sector_num += n;
2887 }
2888}
2889
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002890int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002891{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002892 QEMUIOVector qiov;
2893 struct iovec iov = {
2894 .iov_base = (void *)buf,
2895 .iov_len = bytes,
2896 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002897 int ret;
bellard83f64092006-08-01 16:21:11 +00002898
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002899 if (bytes < 0) {
2900 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002901 }
2902
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002903 qemu_iovec_init_external(&qiov, &iov, 1);
2904 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2905 if (ret < 0) {
2906 return ret;
bellard83f64092006-08-01 16:21:11 +00002907 }
2908
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002909 return bytes;
bellard83f64092006-08-01 16:21:11 +00002910}
2911
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002912int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002913{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002914 int ret;
bellard83f64092006-08-01 16:21:11 +00002915
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002916 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2917 if (ret < 0) {
2918 return ret;
bellard83f64092006-08-01 16:21:11 +00002919 }
2920
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002921 return qiov->size;
2922}
2923
2924int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002925 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002926{
2927 QEMUIOVector qiov;
2928 struct iovec iov = {
2929 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002930 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002931 };
2932
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002933 if (bytes < 0) {
2934 return -EINVAL;
2935 }
2936
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002937 qemu_iovec_init_external(&qiov, &iov, 1);
2938 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002939}
bellard83f64092006-08-01 16:21:11 +00002940
Kevin Wolff08145f2010-06-16 16:38:15 +02002941/*
2942 * Writes to the file and ensures that no writes are reordered across this
2943 * request (acts as a barrier)
2944 *
2945 * Returns 0 on success, -errno in error cases.
2946 */
2947int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2948 const void *buf, int count)
2949{
2950 int ret;
2951
2952 ret = bdrv_pwrite(bs, offset, buf, count);
2953 if (ret < 0) {
2954 return ret;
2955 }
2956
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002957 /* No flush needed for cache modes that already do it */
2958 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002959 bdrv_flush(bs);
2960 }
2961
2962 return 0;
2963}
2964
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002965static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002966 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2967{
2968 /* Perform I/O through a temporary buffer so that users who scribble over
2969 * their read buffer while the operation is in progress do not end up
2970 * modifying the image file. This is critical for zero-copy guest I/O
2971 * where anything might happen inside guest memory.
2972 */
2973 void *bounce_buffer;
2974
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002975 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002976 struct iovec iov;
2977 QEMUIOVector bounce_qiov;
2978 int64_t cluster_sector_num;
2979 int cluster_nb_sectors;
2980 size_t skip_bytes;
2981 int ret;
2982
2983 /* Cover entire cluster so no additional backing file I/O is required when
2984 * allocating cluster in the image file.
2985 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002986 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2987 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002988
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002989 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2990 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002991
2992 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2993 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2994 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2995
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002996 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2997 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002998 if (ret < 0) {
2999 goto err;
3000 }
3001
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003002 if (drv->bdrv_co_write_zeroes &&
3003 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003004 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003005 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003006 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003007 /* This does not change the data on the disk, it is not necessary
3008 * to flush even in cache=writethrough mode.
3009 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003010 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003011 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003012 }
3013
Stefan Hajnocziab185922011-11-17 13:40:31 +00003014 if (ret < 0) {
3015 /* It might be okay to ignore write errors for guest requests. If this
3016 * is a deliberate copy-on-read then we don't want to ignore the error.
3017 * Simply report it in all cases.
3018 */
3019 goto err;
3020 }
3021
3022 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003023 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3024 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003025
3026err:
3027 qemu_vfree(bounce_buffer);
3028 return ret;
3029}
3030
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003031/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003032 * Forwards an already correctly aligned request to the BlockDriver. This
3033 * handles copy on read and zeroing after EOF; any other features must be
3034 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003035 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003036static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003037 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003038 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003039{
3040 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003041 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003042
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003043 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3044 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003045
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003046 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3047 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
3048
3049 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003050 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003051 /* If we touch the same cluster it counts as an overlap. This
3052 * guarantees that allocating writes will be serialized and not race
3053 * with each other for the same cluster. For example, in copy-on-read
3054 * it ensures that the CoR read and write operations are atomic and
3055 * guest writes cannot interleave between them. */
3056 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003057 }
3058
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003059 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003060
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003061 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003062 int pnum;
3063
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003064 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003065 if (ret < 0) {
3066 goto out;
3067 }
3068
3069 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003070 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003071 goto out;
3072 }
3073 }
3074
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003075 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003076 if (!(bs->zero_beyond_eof && bs->growable)) {
3077 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3078 } else {
3079 /* Read zeros after EOF of growable BDSes */
3080 int64_t len, total_sectors, max_nb_sectors;
3081
3082 len = bdrv_getlength(bs);
3083 if (len < 0) {
3084 ret = len;
3085 goto out;
3086 }
3087
Fam Zhengd055a1f2013-09-26 19:55:33 +08003088 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003089 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3090 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003091 if (max_nb_sectors > 0) {
3092 ret = drv->bdrv_co_readv(bs, sector_num,
3093 MIN(nb_sectors, max_nb_sectors), qiov);
3094 } else {
3095 ret = 0;
3096 }
3097
3098 /* Reading beyond end of file is supposed to produce zeroes */
3099 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3100 uint64_t offset = MAX(0, total_sectors - sector_num);
3101 uint64_t bytes = (sector_num + nb_sectors - offset) *
3102 BDRV_SECTOR_SIZE;
3103 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3104 }
3105 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003106
3107out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003108 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003109}
3110
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003111/*
3112 * Handle a read request in coroutine context
3113 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003114static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3115 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003116 BdrvRequestFlags flags)
3117{
3118 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003119 BdrvTrackedRequest req;
3120
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003121 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3122 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3123 uint8_t *head_buf = NULL;
3124 uint8_t *tail_buf = NULL;
3125 QEMUIOVector local_qiov;
3126 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003127 int ret;
3128
3129 if (!drv) {
3130 return -ENOMEDIUM;
3131 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003132 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003133 return -EIO;
3134 }
3135
3136 if (bs->copy_on_read) {
3137 flags |= BDRV_REQ_COPY_ON_READ;
3138 }
3139
3140 /* throttling disk I/O */
3141 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003142 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003143 }
3144
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003145 /* Align read if necessary by padding qiov */
3146 if (offset & (align - 1)) {
3147 head_buf = qemu_blockalign(bs, align);
3148 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3149 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3150 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3151 use_local_qiov = true;
3152
3153 bytes += offset & (align - 1);
3154 offset = offset & ~(align - 1);
3155 }
3156
3157 if ((offset + bytes) & (align - 1)) {
3158 if (!use_local_qiov) {
3159 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3160 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3161 use_local_qiov = true;
3162 }
3163 tail_buf = qemu_blockalign(bs, align);
3164 qemu_iovec_add(&local_qiov, tail_buf,
3165 align - ((offset + bytes) & (align - 1)));
3166
3167 bytes = ROUND_UP(bytes, align);
3168 }
3169
Kevin Wolf65afd212013-12-03 14:55:55 +01003170 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003171 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003172 use_local_qiov ? &local_qiov : qiov,
3173 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003174 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003175
3176 if (use_local_qiov) {
3177 qemu_iovec_destroy(&local_qiov);
3178 qemu_vfree(head_buf);
3179 qemu_vfree(tail_buf);
3180 }
3181
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003182 return ret;
3183}
3184
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003185static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3186 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3187 BdrvRequestFlags flags)
3188{
3189 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3190 return -EINVAL;
3191 }
3192
3193 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3194 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3195}
3196
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003197int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003198 int nb_sectors, QEMUIOVector *qiov)
3199{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003200 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003201
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003202 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3203}
3204
3205int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3206 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3207{
3208 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3209
3210 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3211 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003212}
3213
Peter Lievenc31cb702013-10-24 12:06:58 +02003214/* if no limit is specified in the BlockLimits use a default
3215 * of 32768 512-byte sectors (16 MiB) per request.
3216 */
3217#define MAX_WRITE_ZEROES_DEFAULT 32768
3218
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003219static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003220 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003221{
3222 BlockDriver *drv = bs->drv;
3223 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003224 struct iovec iov = {0};
3225 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003226
Peter Lievenc31cb702013-10-24 12:06:58 +02003227 int max_write_zeroes = bs->bl.max_write_zeroes ?
3228 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003229
Peter Lievenc31cb702013-10-24 12:06:58 +02003230 while (nb_sectors > 0 && !ret) {
3231 int num = nb_sectors;
3232
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003233 /* Align request. Block drivers can expect the "bulk" of the request
3234 * to be aligned.
3235 */
3236 if (bs->bl.write_zeroes_alignment
3237 && num > bs->bl.write_zeroes_alignment) {
3238 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3239 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003240 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003241 num -= sector_num % bs->bl.write_zeroes_alignment;
3242 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3243 /* Shorten the request to the last aligned sector. num cannot
3244 * underflow because num > bs->bl.write_zeroes_alignment.
3245 */
3246 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003247 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003248 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003249
3250 /* limit request size */
3251 if (num > max_write_zeroes) {
3252 num = max_write_zeroes;
3253 }
3254
3255 ret = -ENOTSUP;
3256 /* First try the efficient write zeroes operation */
3257 if (drv->bdrv_co_write_zeroes) {
3258 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3259 }
3260
3261 if (ret == -ENOTSUP) {
3262 /* Fall back to bounce buffer if write zeroes is unsupported */
3263 iov.iov_len = num * BDRV_SECTOR_SIZE;
3264 if (iov.iov_base == NULL) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003265 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3266 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003267 }
3268 qemu_iovec_init_external(&qiov, &iov, 1);
3269
3270 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003271
3272 /* Keep bounce buffer around if it is big enough for all
3273 * all future requests.
3274 */
3275 if (num < max_write_zeroes) {
3276 qemu_vfree(iov.iov_base);
3277 iov.iov_base = NULL;
3278 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003279 }
3280
3281 sector_num += num;
3282 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003283 }
3284
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003285 qemu_vfree(iov.iov_base);
3286 return ret;
3287}
3288
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003289/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003290 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003291 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003292static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003293 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3294 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003295{
3296 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003297 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003298 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003299
Kevin Wolfb404f722013-12-03 14:02:23 +01003300 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3301 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003302
Kevin Wolfb404f722013-12-03 14:02:23 +01003303 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3304 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003305
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003306 waited = wait_serialising_requests(req);
3307 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003308 assert(req->overlap_offset <= offset);
3309 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003310
Kevin Wolf65afd212013-12-03 14:55:55 +01003311 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003312
Peter Lieven465bee12014-05-18 00:58:19 +02003313 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3314 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3315 qemu_iovec_is_zero(qiov)) {
3316 flags |= BDRV_REQ_ZERO_WRITE;
3317 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3318 flags |= BDRV_REQ_MAY_UNMAP;
3319 }
3320 }
3321
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003322 if (ret < 0) {
3323 /* Do nothing, write notifier decided to fail this request */
3324 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003325 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003326 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003327 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003328 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003329 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3330 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003331 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003332
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003333 if (ret == 0 && !bs->enable_write_cache) {
3334 ret = bdrv_co_flush(bs);
3335 }
3336
Fam Zhenge4654d22013-11-13 18:29:43 +08003337 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003338
3339 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3340 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3341 }
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003342 if (bs->growable && ret >= 0) {
3343 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3344 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003345
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003346 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003347}
3348
Kevin Wolfb404f722013-12-03 14:02:23 +01003349/*
3350 * Handle a write request in coroutine context
3351 */
Kevin Wolf66015532013-12-03 14:40:18 +01003352static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3353 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003354 BdrvRequestFlags flags)
3355{
Kevin Wolf65afd212013-12-03 14:55:55 +01003356 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003357 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3358 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3359 uint8_t *head_buf = NULL;
3360 uint8_t *tail_buf = NULL;
3361 QEMUIOVector local_qiov;
3362 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003363 int ret;
3364
3365 if (!bs->drv) {
3366 return -ENOMEDIUM;
3367 }
3368 if (bs->read_only) {
3369 return -EACCES;
3370 }
Kevin Wolf66015532013-12-03 14:40:18 +01003371 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003372 return -EIO;
3373 }
3374
Kevin Wolfb404f722013-12-03 14:02:23 +01003375 /* throttling disk I/O */
3376 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003377 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003378 }
3379
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003380 /*
3381 * Align write if necessary by performing a read-modify-write cycle.
3382 * Pad qiov with the read parts and be sure to have a tracked request not
3383 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3384 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003385 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003386
3387 if (offset & (align - 1)) {
3388 QEMUIOVector head_qiov;
3389 struct iovec head_iov;
3390
3391 mark_request_serialising(&req, align);
3392 wait_serialising_requests(&req);
3393
3394 head_buf = qemu_blockalign(bs, align);
3395 head_iov = (struct iovec) {
3396 .iov_base = head_buf,
3397 .iov_len = align,
3398 };
3399 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3400
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003401 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003402 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3403 align, &head_qiov, 0);
3404 if (ret < 0) {
3405 goto fail;
3406 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003407 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003408
3409 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3410 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3411 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3412 use_local_qiov = true;
3413
3414 bytes += offset & (align - 1);
3415 offset = offset & ~(align - 1);
3416 }
3417
3418 if ((offset + bytes) & (align - 1)) {
3419 QEMUIOVector tail_qiov;
3420 struct iovec tail_iov;
3421 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003422 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003423
3424 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003425 waited = wait_serialising_requests(&req);
3426 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003427
3428 tail_buf = qemu_blockalign(bs, align);
3429 tail_iov = (struct iovec) {
3430 .iov_base = tail_buf,
3431 .iov_len = align,
3432 };
3433 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3434
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003435 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003436 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3437 align, &tail_qiov, 0);
3438 if (ret < 0) {
3439 goto fail;
3440 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003441 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003442
3443 if (!use_local_qiov) {
3444 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3445 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3446 use_local_qiov = true;
3447 }
3448
3449 tail_bytes = (offset + bytes) & (align - 1);
3450 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3451
3452 bytes = ROUND_UP(bytes, align);
3453 }
3454
3455 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3456 use_local_qiov ? &local_qiov : qiov,
3457 flags);
3458
3459fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003460 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003461
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003462 if (use_local_qiov) {
3463 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003464 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003465 qemu_vfree(head_buf);
3466 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003467
Kevin Wolfb404f722013-12-03 14:02:23 +01003468 return ret;
3469}
3470
Kevin Wolf66015532013-12-03 14:40:18 +01003471static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3472 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3473 BdrvRequestFlags flags)
3474{
3475 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3476 return -EINVAL;
3477 }
3478
3479 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3480 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3481}
3482
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003483int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3484 int nb_sectors, QEMUIOVector *qiov)
3485{
3486 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3487
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003488 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3489}
3490
3491int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003492 int64_t sector_num, int nb_sectors,
3493 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003494{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003495 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003496
Peter Lievend32f35c2013-10-24 12:06:52 +02003497 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3498 flags &= ~BDRV_REQ_MAY_UNMAP;
3499 }
3500
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003501 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003502 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003503}
3504
bellard83f64092006-08-01 16:21:11 +00003505/**
bellard83f64092006-08-01 16:21:11 +00003506 * Truncate file to 'offset' bytes (needed only for file protocols)
3507 */
3508int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3509{
3510 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003511 int ret;
bellard83f64092006-08-01 16:21:11 +00003512 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003513 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003514 if (!drv->bdrv_truncate)
3515 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003516 if (bs->read_only)
3517 return -EACCES;
Fam Zheng3718d8a2014-05-23 21:29:43 +08003518 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
Marcelo Tosatti85916752011-01-26 12:12:35 -02003519 return -EBUSY;
Fam Zheng3718d8a2014-05-23 21:29:43 +08003520 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003521 ret = drv->bdrv_truncate(bs, offset);
3522 if (ret == 0) {
3523 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003524 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003525 }
3526 return ret;
bellard83f64092006-08-01 16:21:11 +00003527}
3528
3529/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003530 * Length of a allocated file in bytes. Sparse files are counted by actual
3531 * allocated space. Return < 0 if error or unknown.
3532 */
3533int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3534{
3535 BlockDriver *drv = bs->drv;
3536 if (!drv) {
3537 return -ENOMEDIUM;
3538 }
3539 if (drv->bdrv_get_allocated_file_size) {
3540 return drv->bdrv_get_allocated_file_size(bs);
3541 }
3542 if (bs->file) {
3543 return bdrv_get_allocated_file_size(bs->file);
3544 }
3545 return -ENOTSUP;
3546}
3547
3548/**
bellard83f64092006-08-01 16:21:11 +00003549 * Length of a file in bytes. Return < 0 if error or unknown.
3550 */
3551int64_t bdrv_getlength(BlockDriverState *bs)
3552{
3553 BlockDriver *drv = bs->drv;
3554 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003555 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003556
Kevin Wolfb94a2612013-10-29 12:18:58 +01003557 if (drv->has_variable_length) {
3558 int ret = refresh_total_sectors(bs, bs->total_sectors);
3559 if (ret < 0) {
3560 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003561 }
bellard83f64092006-08-01 16:21:11 +00003562 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003563 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003564}
3565
bellard19cb3732006-08-19 11:45:59 +00003566/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003567void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003568{
bellard19cb3732006-08-19 11:45:59 +00003569 int64_t length;
3570 length = bdrv_getlength(bs);
3571 if (length < 0)
3572 length = 0;
3573 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01003574 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00003575 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00003576}
bellardcf989512004-02-16 21:56:36 +00003577
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003578void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3579 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003580{
3581 bs->on_read_error = on_read_error;
3582 bs->on_write_error = on_write_error;
3583}
3584
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003585BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003586{
3587 return is_read ? bs->on_read_error : bs->on_write_error;
3588}
3589
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003590BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3591{
3592 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3593
3594 switch (on_err) {
3595 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003596 return (error == ENOSPC) ?
3597 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003598 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003599 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003600 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003601 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003602 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003603 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003604 default:
3605 abort();
3606 }
3607}
3608
3609/* This is done by device models because, while the block layer knows
3610 * about the error, it does not know whether an operation comes from
3611 * the device or the block layer (from a job, for example).
3612 */
3613void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3614 bool is_read, int error)
3615{
3616 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003617
Wenchao Xiaa5895692014-06-18 08:43:30 +02003618 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003619 /* First set the iostatus, so that "info block" returns an iostatus
3620 * that matches the events raised so far (an additional error iostatus
3621 * is fine, but not a lost one).
3622 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003623 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003624
3625 /* Then raise the request to stop the VM and the event.
3626 * qemu_system_vmstop_request_prepare has two effects. First,
3627 * it ensures that the STOP event always comes after the
3628 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3629 * can observe the STOP event and do a "cont" before the STOP
3630 * event is issued, the VM will not stop. In this case, vm_start()
3631 * also ensures that the STOP/RESUME pair of events is emitted.
3632 */
3633 qemu_system_vmstop_request_prepare();
Wenchao Xia5a2d2cb2014-06-18 08:43:45 +02003634 qapi_event_send_block_io_error(bdrv_get_device_name(bs),
3635 is_read ? IO_OPERATION_TYPE_READ :
3636 IO_OPERATION_TYPE_WRITE,
3637 action, &error_abort);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003638 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3639 } else {
Wenchao Xia5a2d2cb2014-06-18 08:43:45 +02003640 qapi_event_send_block_io_error(bdrv_get_device_name(bs),
3641 is_read ? IO_OPERATION_TYPE_READ :
3642 IO_OPERATION_TYPE_WRITE,
3643 action, &error_abort);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003644 }
3645}
3646
bellardb3380822004-03-14 21:38:54 +00003647int bdrv_is_read_only(BlockDriverState *bs)
3648{
3649 return bs->read_only;
3650}
3651
ths985a03b2007-12-24 16:10:43 +00003652int bdrv_is_sg(BlockDriverState *bs)
3653{
3654 return bs->sg;
3655}
3656
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003657int bdrv_enable_write_cache(BlockDriverState *bs)
3658{
3659 return bs->enable_write_cache;
3660}
3661
Paolo Bonzini425b0142012-06-06 00:04:52 +02003662void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3663{
3664 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003665
3666 /* so a reopen() will preserve wce */
3667 if (wce) {
3668 bs->open_flags |= BDRV_O_CACHE_WB;
3669 } else {
3670 bs->open_flags &= ~BDRV_O_CACHE_WB;
3671 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003672}
3673
bellardea2384d2004-08-01 21:59:26 +00003674int bdrv_is_encrypted(BlockDriverState *bs)
3675{
3676 if (bs->backing_hd && bs->backing_hd->encrypted)
3677 return 1;
3678 return bs->encrypted;
3679}
3680
aliguoric0f4ce72009-03-05 23:01:01 +00003681int bdrv_key_required(BlockDriverState *bs)
3682{
3683 BlockDriverState *backing_hd = bs->backing_hd;
3684
3685 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3686 return 1;
3687 return (bs->encrypted && !bs->valid_key);
3688}
3689
bellardea2384d2004-08-01 21:59:26 +00003690int bdrv_set_key(BlockDriverState *bs, const char *key)
3691{
3692 int ret;
3693 if (bs->backing_hd && bs->backing_hd->encrypted) {
3694 ret = bdrv_set_key(bs->backing_hd, key);
3695 if (ret < 0)
3696 return ret;
3697 if (!bs->encrypted)
3698 return 0;
3699 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003700 if (!bs->encrypted) {
3701 return -EINVAL;
3702 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3703 return -ENOMEDIUM;
3704 }
aliguoric0f4ce72009-03-05 23:01:01 +00003705 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003706 if (ret < 0) {
3707 bs->valid_key = 0;
3708 } else if (!bs->valid_key) {
3709 bs->valid_key = 1;
3710 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003711 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003712 }
aliguoric0f4ce72009-03-05 23:01:01 +00003713 return ret;
bellardea2384d2004-08-01 21:59:26 +00003714}
3715
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003716const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003717{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003718 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003719}
3720
ths5fafdf22007-09-16 21:08:06 +00003721void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003722 void *opaque)
3723{
3724 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003725 int count = 0;
3726 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003727
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003728 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003729 if (drv->format_name) {
3730 bool found = false;
3731 int i = count;
3732 while (formats && i && !found) {
3733 found = !strcmp(formats[--i], drv->format_name);
3734 }
3735
3736 if (!found) {
3737 formats = g_realloc(formats, (count + 1) * sizeof(char *));
3738 formats[count++] = drv->format_name;
3739 it(opaque, drv->format_name);
3740 }
3741 }
bellardea2384d2004-08-01 21:59:26 +00003742 }
Jeff Codye855e4f2014-04-28 18:29:54 -04003743 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003744}
3745
Benoît Canetdc364f42014-01-23 21:31:32 +01003746/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003747BlockDriverState *bdrv_find(const char *name)
3748{
3749 BlockDriverState *bs;
3750
Benoît Canetdc364f42014-01-23 21:31:32 +01003751 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003752 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003753 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003754 }
bellardb3380822004-03-14 21:38:54 +00003755 }
3756 return NULL;
3757}
3758
Benoît Canetdc364f42014-01-23 21:31:32 +01003759/* This function is to find a node in the bs graph */
3760BlockDriverState *bdrv_find_node(const char *node_name)
3761{
3762 BlockDriverState *bs;
3763
3764 assert(node_name);
3765
3766 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3767 if (!strcmp(node_name, bs->node_name)) {
3768 return bs;
3769 }
3770 }
3771 return NULL;
3772}
3773
Benoît Canetc13163f2014-01-23 21:31:34 +01003774/* Put this QMP function here so it can access the static graph_bdrv_states. */
3775BlockDeviceInfoList *bdrv_named_nodes_list(void)
3776{
3777 BlockDeviceInfoList *list, *entry;
3778 BlockDriverState *bs;
3779
3780 list = NULL;
3781 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3782 entry = g_malloc0(sizeof(*entry));
3783 entry->value = bdrv_block_device_info(bs);
3784 entry->next = list;
3785 list = entry;
3786 }
3787
3788 return list;
3789}
3790
Benoît Canet12d3ba82014-01-23 21:31:35 +01003791BlockDriverState *bdrv_lookup_bs(const char *device,
3792 const char *node_name,
3793 Error **errp)
3794{
3795 BlockDriverState *bs = NULL;
3796
Benoît Canet12d3ba82014-01-23 21:31:35 +01003797 if (device) {
3798 bs = bdrv_find(device);
3799
Benoît Canetdd67fa52014-02-12 17:15:06 +01003800 if (bs) {
3801 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003802 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003803 }
3804
Benoît Canetdd67fa52014-02-12 17:15:06 +01003805 if (node_name) {
3806 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003807
Benoît Canetdd67fa52014-02-12 17:15:06 +01003808 if (bs) {
3809 return bs;
3810 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003811 }
3812
Benoît Canetdd67fa52014-02-12 17:15:06 +01003813 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3814 device ? device : "",
3815 node_name ? node_name : "");
3816 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003817}
3818
Markus Armbruster2f399b02010-06-02 18:55:20 +02003819BlockDriverState *bdrv_next(BlockDriverState *bs)
3820{
3821 if (!bs) {
3822 return QTAILQ_FIRST(&bdrv_states);
3823 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003824 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003825}
3826
aliguori51de9762009-03-05 23:00:43 +00003827void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003828{
3829 BlockDriverState *bs;
3830
Benoît Canetdc364f42014-01-23 21:31:32 +01003831 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003832 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003833 }
3834}
3835
bellardea2384d2004-08-01 21:59:26 +00003836const char *bdrv_get_device_name(BlockDriverState *bs)
3837{
3838 return bs->device_name;
3839}
3840
Markus Armbrusterc8433282012-06-05 16:49:24 +02003841int bdrv_get_flags(BlockDriverState *bs)
3842{
3843 return bs->open_flags;
3844}
3845
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003846int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003847{
3848 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003849 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003850
Benoît Canetdc364f42014-01-23 21:31:32 +01003851 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003852 AioContext *aio_context = bdrv_get_aio_context(bs);
3853 int ret;
3854
3855 aio_context_acquire(aio_context);
3856 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003857 if (ret < 0 && !result) {
3858 result = ret;
3859 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003860 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003861 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003862
3863 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003864}
3865
Peter Lieven3ac21622013-06-28 12:47:42 +02003866int bdrv_has_zero_init_1(BlockDriverState *bs)
3867{
3868 return 1;
3869}
3870
Kevin Wolff2feebb2010-04-14 17:30:35 +02003871int bdrv_has_zero_init(BlockDriverState *bs)
3872{
3873 assert(bs->drv);
3874
Paolo Bonzini11212d82013-09-04 19:00:27 +02003875 /* If BS is a copy on write image, it is initialized to
3876 the contents of the base image, which may not be zeroes. */
3877 if (bs->backing_hd) {
3878 return 0;
3879 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003880 if (bs->drv->bdrv_has_zero_init) {
3881 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003882 }
3883
Peter Lieven3ac21622013-06-28 12:47:42 +02003884 /* safe default */
3885 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003886}
3887
Peter Lieven4ce78692013-10-24 12:06:54 +02003888bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3889{
3890 BlockDriverInfo bdi;
3891
3892 if (bs->backing_hd) {
3893 return false;
3894 }
3895
3896 if (bdrv_get_info(bs, &bdi) == 0) {
3897 return bdi.unallocated_blocks_are_zero;
3898 }
3899
3900 return false;
3901}
3902
3903bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3904{
3905 BlockDriverInfo bdi;
3906
3907 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3908 return false;
3909 }
3910
3911 if (bdrv_get_info(bs, &bdi) == 0) {
3912 return bdi.can_write_zeroes_with_unmap;
3913 }
3914
3915 return false;
3916}
3917
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003918typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003919 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003920 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003921 int64_t sector_num;
3922 int nb_sectors;
3923 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003924 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003925 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003926} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003927
thsf58c7b32008-06-05 21:53:49 +00003928/*
3929 * Returns true iff the specified sector is present in the disk image. Drivers
3930 * not implementing the functionality are assumed to not support backing files,
3931 * hence all their sectors are reported as allocated.
3932 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003933 * If 'sector_num' is beyond the end of the disk image the return value is 0
3934 * and 'pnum' is set to 0.
3935 *
thsf58c7b32008-06-05 21:53:49 +00003936 * 'pnum' is set to the number of sectors (including and immediately following
3937 * the specified sector) that are known to be in the same
3938 * allocated/unallocated state.
3939 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003940 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3941 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003942 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003943static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3944 int64_t sector_num,
3945 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003946{
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003947 int64_t length;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003948 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003949 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003950
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003951 length = bdrv_getlength(bs);
3952 if (length < 0) {
3953 return length;
3954 }
3955
3956 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003957 *pnum = 0;
3958 return 0;
3959 }
3960
3961 n = bs->total_sectors - sector_num;
3962 if (n < nb_sectors) {
3963 nb_sectors = n;
3964 }
3965
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003966 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003967 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02003968 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003969 if (bs->drv->protocol_name) {
3970 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3971 }
3972 return ret;
thsf58c7b32008-06-05 21:53:49 +00003973 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003974
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003975 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3976 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003977 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003978 return ret;
3979 }
3980
Peter Lieven92bc50a2013-10-08 14:43:14 +02003981 if (ret & BDRV_BLOCK_RAW) {
3982 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3983 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3984 *pnum, pnum);
3985 }
3986
Kevin Wolfe88ae222014-05-06 15:25:36 +02003987 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
3988 ret |= BDRV_BLOCK_ALLOCATED;
3989 }
3990
Peter Lievenc3d86882013-10-24 12:07:04 +02003991 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3992 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003993 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003994 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003995 BlockDriverState *bs2 = bs->backing_hd;
3996 int64_t length2 = bdrv_getlength(bs2);
3997 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3998 ret |= BDRV_BLOCK_ZERO;
3999 }
4000 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004001 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004002
4003 if (bs->file &&
4004 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4005 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4006 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4007 *pnum, pnum);
4008 if (ret2 >= 0) {
4009 /* Ignore errors. This is just providing extra information, it
4010 * is useful but not necessary.
4011 */
4012 ret |= (ret2 & BDRV_BLOCK_ZERO);
4013 }
4014 }
4015
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004016 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004017}
4018
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004019/* Coroutine wrapper for bdrv_get_block_status() */
4020static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004021{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004022 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004023 BlockDriverState *bs = data->bs;
4024
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004025 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4026 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004027 data->done = true;
4028}
4029
4030/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004031 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004032 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004033 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004034 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004035int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4036 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004037{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004038 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004039 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004040 .bs = bs,
4041 .sector_num = sector_num,
4042 .nb_sectors = nb_sectors,
4043 .pnum = pnum,
4044 .done = false,
4045 };
4046
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004047 if (qemu_in_coroutine()) {
4048 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004049 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004050 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004051 AioContext *aio_context = bdrv_get_aio_context(bs);
4052
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004053 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004054 qemu_coroutine_enter(co, &data);
4055 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004056 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004057 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004058 }
4059 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004060}
4061
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004062int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4063 int nb_sectors, int *pnum)
4064{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004065 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4066 if (ret < 0) {
4067 return ret;
4068 }
Kevin Wolfe88ae222014-05-06 15:25:36 +02004069 return (ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004070}
4071
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004072/*
4073 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4074 *
4075 * Return true if the given sector is allocated in any image between
4076 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4077 * sector is allocated in any image of the chain. Return false otherwise.
4078 *
4079 * 'pnum' is set to the number of sectors (including and immediately following
4080 * the specified sector) that are known to be in the same
4081 * allocated/unallocated state.
4082 *
4083 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004084int bdrv_is_allocated_above(BlockDriverState *top,
4085 BlockDriverState *base,
4086 int64_t sector_num,
4087 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004088{
4089 BlockDriverState *intermediate;
4090 int ret, n = nb_sectors;
4091
4092 intermediate = top;
4093 while (intermediate && intermediate != base) {
4094 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004095 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4096 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004097 if (ret < 0) {
4098 return ret;
4099 } else if (ret) {
4100 *pnum = pnum_inter;
4101 return 1;
4102 }
4103
4104 /*
4105 * [sector_num, nb_sectors] is unallocated on top but intermediate
4106 * might have
4107 *
4108 * [sector_num+x, nr_sectors] allocated.
4109 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004110 if (n > pnum_inter &&
4111 (intermediate == top ||
4112 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004113 n = pnum_inter;
4114 }
4115
4116 intermediate = intermediate->backing_hd;
4117 }
4118
4119 *pnum = n;
4120 return 0;
4121}
4122
aliguori045df332009-03-05 23:00:48 +00004123const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4124{
4125 if (bs->backing_hd && bs->backing_hd->encrypted)
4126 return bs->backing_file;
4127 else if (bs->encrypted)
4128 return bs->filename;
4129 else
4130 return NULL;
4131}
4132
ths5fafdf22007-09-16 21:08:06 +00004133void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004134 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004135{
Kevin Wolf3574c602011-10-26 11:02:11 +02004136 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004137}
4138
ths5fafdf22007-09-16 21:08:06 +00004139int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004140 const uint8_t *buf, int nb_sectors)
4141{
4142 BlockDriver *drv = bs->drv;
4143 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004144 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004145 if (!drv->bdrv_write_compressed)
4146 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004147 if (bdrv_check_request(bs, sector_num, nb_sectors))
4148 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004149
Fam Zhenge4654d22013-11-13 18:29:43 +08004150 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004151
bellardfaea38e2006-08-05 21:31:00 +00004152 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4153}
ths3b46e622007-09-17 08:09:54 +00004154
bellardfaea38e2006-08-05 21:31:00 +00004155int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4156{
4157 BlockDriver *drv = bs->drv;
4158 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004159 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004160 if (!drv->bdrv_get_info)
4161 return -ENOTSUP;
4162 memset(bdi, 0, sizeof(*bdi));
4163 return drv->bdrv_get_info(bs, bdi);
4164}
4165
Max Reitzeae041f2013-10-09 10:46:16 +02004166ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4167{
4168 BlockDriver *drv = bs->drv;
4169 if (drv && drv->bdrv_get_specific_info) {
4170 return drv->bdrv_get_specific_info(bs);
4171 }
4172 return NULL;
4173}
4174
Christoph Hellwig45566e92009-07-10 23:11:57 +02004175int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4176 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004177{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004178 QEMUIOVector qiov;
4179 struct iovec iov = {
4180 .iov_base = (void *) buf,
4181 .iov_len = size,
4182 };
4183
4184 qemu_iovec_init_external(&qiov, &iov, 1);
4185 return bdrv_writev_vmstate(bs, &qiov, pos);
4186}
4187
4188int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4189{
aliguori178e08a2009-04-05 19:10:55 +00004190 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004191
4192 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004193 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004194 } else if (drv->bdrv_save_vmstate) {
4195 return drv->bdrv_save_vmstate(bs, qiov, pos);
4196 } else if (bs->file) {
4197 return bdrv_writev_vmstate(bs->file, qiov, pos);
4198 }
4199
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004200 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004201}
4202
Christoph Hellwig45566e92009-07-10 23:11:57 +02004203int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4204 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004205{
4206 BlockDriver *drv = bs->drv;
4207 if (!drv)
4208 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004209 if (drv->bdrv_load_vmstate)
4210 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4211 if (bs->file)
4212 return bdrv_load_vmstate(bs->file, buf, pos, size);
4213 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004214}
4215
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004216void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4217{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004218 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004219 return;
4220 }
4221
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004222 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004223}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004224
Kevin Wolf41c695c2012-12-06 14:32:58 +01004225int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4226 const char *tag)
4227{
4228 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4229 bs = bs->file;
4230 }
4231
4232 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4233 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4234 }
4235
4236 return -ENOTSUP;
4237}
4238
Fam Zheng4cc70e92013-11-20 10:01:54 +08004239int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4240{
4241 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4242 bs = bs->file;
4243 }
4244
4245 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4246 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4247 }
4248
4249 return -ENOTSUP;
4250}
4251
Kevin Wolf41c695c2012-12-06 14:32:58 +01004252int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4253{
Max Reitz938789e2014-03-10 23:44:08 +01004254 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004255 bs = bs->file;
4256 }
4257
4258 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4259 return bs->drv->bdrv_debug_resume(bs, tag);
4260 }
4261
4262 return -ENOTSUP;
4263}
4264
4265bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4266{
4267 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4268 bs = bs->file;
4269 }
4270
4271 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4272 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4273 }
4274
4275 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004276}
4277
Blue Swirl199630b2010-07-25 20:49:34 +00004278int bdrv_is_snapshot(BlockDriverState *bs)
4279{
4280 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4281}
4282
Jeff Codyb1b1d782012-10-16 15:49:09 -04004283/* backing_file can either be relative, or absolute, or a protocol. If it is
4284 * relative, it must be relative to the chain. So, passing in bs->filename
4285 * from a BDS as backing_file should not be done, as that may be relative to
4286 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004287BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4288 const char *backing_file)
4289{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004290 char *filename_full = NULL;
4291 char *backing_file_full = NULL;
4292 char *filename_tmp = NULL;
4293 int is_protocol = 0;
4294 BlockDriverState *curr_bs = NULL;
4295 BlockDriverState *retval = NULL;
4296
4297 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004298 return NULL;
4299 }
4300
Jeff Codyb1b1d782012-10-16 15:49:09 -04004301 filename_full = g_malloc(PATH_MAX);
4302 backing_file_full = g_malloc(PATH_MAX);
4303 filename_tmp = g_malloc(PATH_MAX);
4304
4305 is_protocol = path_has_protocol(backing_file);
4306
4307 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4308
4309 /* If either of the filename paths is actually a protocol, then
4310 * compare unmodified paths; otherwise make paths relative */
4311 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4312 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4313 retval = curr_bs->backing_hd;
4314 break;
4315 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004316 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004317 /* If not an absolute filename path, make it relative to the current
4318 * image's filename path */
4319 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4320 backing_file);
4321
4322 /* We are going to compare absolute pathnames */
4323 if (!realpath(filename_tmp, filename_full)) {
4324 continue;
4325 }
4326
4327 /* We need to make sure the backing filename we are comparing against
4328 * is relative to the current image filename (or absolute) */
4329 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4330 curr_bs->backing_file);
4331
4332 if (!realpath(filename_tmp, backing_file_full)) {
4333 continue;
4334 }
4335
4336 if (strcmp(backing_file_full, filename_full) == 0) {
4337 retval = curr_bs->backing_hd;
4338 break;
4339 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004340 }
4341 }
4342
Jeff Codyb1b1d782012-10-16 15:49:09 -04004343 g_free(filename_full);
4344 g_free(backing_file_full);
4345 g_free(filename_tmp);
4346 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004347}
4348
Benoît Canetf198fd12012-08-02 10:22:47 +02004349int bdrv_get_backing_file_depth(BlockDriverState *bs)
4350{
4351 if (!bs->drv) {
4352 return 0;
4353 }
4354
4355 if (!bs->backing_hd) {
4356 return 0;
4357 }
4358
4359 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4360}
4361
Jeff Cody79fac562012-09-27 13:29:15 -04004362BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4363{
4364 BlockDriverState *curr_bs = NULL;
4365
4366 if (!bs) {
4367 return NULL;
4368 }
4369
4370 curr_bs = bs;
4371
4372 while (curr_bs->backing_hd) {
4373 curr_bs = curr_bs->backing_hd;
4374 }
4375 return curr_bs;
4376}
4377
bellard83f64092006-08-01 16:21:11 +00004378/**************************************************************/
4379/* async I/Os */
4380
aliguori3b69e4b2009-01-22 16:59:24 +00004381BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004382 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004383 BlockDriverCompletionFunc *cb, void *opaque)
4384{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004385 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4386
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004387 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004388 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004389}
4390
aliguorif141eaf2009-04-07 18:43:24 +00004391BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4392 QEMUIOVector *qiov, int nb_sectors,
4393 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004394{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004395 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4396
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004397 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004398 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004399}
4400
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004401BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4402 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4403 BlockDriverCompletionFunc *cb, void *opaque)
4404{
4405 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4406
4407 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4408 BDRV_REQ_ZERO_WRITE | flags,
4409 cb, opaque, true);
4410}
4411
Kevin Wolf40b4f532009-09-09 17:53:37 +02004412
4413typedef struct MultiwriteCB {
4414 int error;
4415 int num_requests;
4416 int num_callbacks;
4417 struct {
4418 BlockDriverCompletionFunc *cb;
4419 void *opaque;
4420 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004421 } callbacks[];
4422} MultiwriteCB;
4423
4424static void multiwrite_user_cb(MultiwriteCB *mcb)
4425{
4426 int i;
4427
4428 for (i = 0; i < mcb->num_callbacks; i++) {
4429 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004430 if (mcb->callbacks[i].free_qiov) {
4431 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4432 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004433 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004434 }
4435}
4436
4437static void multiwrite_cb(void *opaque, int ret)
4438{
4439 MultiwriteCB *mcb = opaque;
4440
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004441 trace_multiwrite_cb(mcb, ret);
4442
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004443 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004444 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004445 }
4446
4447 mcb->num_requests--;
4448 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004449 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004450 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004451 }
4452}
4453
4454static int multiwrite_req_compare(const void *a, const void *b)
4455{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004456 const BlockRequest *req1 = a, *req2 = b;
4457
4458 /*
4459 * Note that we can't simply subtract req2->sector from req1->sector
4460 * here as that could overflow the return value.
4461 */
4462 if (req1->sector > req2->sector) {
4463 return 1;
4464 } else if (req1->sector < req2->sector) {
4465 return -1;
4466 } else {
4467 return 0;
4468 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004469}
4470
4471/*
4472 * Takes a bunch of requests and tries to merge them. Returns the number of
4473 * requests that remain after merging.
4474 */
4475static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4476 int num_reqs, MultiwriteCB *mcb)
4477{
4478 int i, outidx;
4479
4480 // Sort requests by start sector
4481 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4482
4483 // Check if adjacent requests touch the same clusters. If so, combine them,
4484 // filling up gaps with zero sectors.
4485 outidx = 0;
4486 for (i = 1; i < num_reqs; i++) {
4487 int merge = 0;
4488 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4489
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004490 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004491 if (reqs[i].sector <= oldreq_last) {
4492 merge = 1;
4493 }
4494
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004495 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4496 merge = 0;
4497 }
4498
Kevin Wolf40b4f532009-09-09 17:53:37 +02004499 if (merge) {
4500 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004501 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004502 qemu_iovec_init(qiov,
4503 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4504
4505 // Add the first request to the merged one. If the requests are
4506 // overlapping, drop the last sectors of the first request.
4507 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004508 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004509
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004510 // We should need to add any zeros between the two requests
4511 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004512
4513 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004514 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004515
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004516 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004517 reqs[outidx].qiov = qiov;
4518
4519 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4520 } else {
4521 outidx++;
4522 reqs[outidx].sector = reqs[i].sector;
4523 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4524 reqs[outidx].qiov = reqs[i].qiov;
4525 }
4526 }
4527
4528 return outidx + 1;
4529}
4530
4531/*
4532 * Submit multiple AIO write requests at once.
4533 *
4534 * On success, the function returns 0 and all requests in the reqs array have
4535 * been submitted. In error case this function returns -1, and any of the
4536 * requests may or may not be submitted yet. In particular, this means that the
4537 * callback will be called for some of the requests, for others it won't. The
4538 * caller must check the error field of the BlockRequest to wait for the right
4539 * callbacks (if error != 0, no callback will be called).
4540 *
4541 * The implementation may modify the contents of the reqs array, e.g. to merge
4542 * requests. However, the fields opaque and error are left unmodified as they
4543 * are used to signal failure for a single request to the caller.
4544 */
4545int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4546{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004547 MultiwriteCB *mcb;
4548 int i;
4549
Ryan Harper301db7c2011-03-07 10:01:04 -06004550 /* don't submit writes if we don't have a medium */
4551 if (bs->drv == NULL) {
4552 for (i = 0; i < num_reqs; i++) {
4553 reqs[i].error = -ENOMEDIUM;
4554 }
4555 return -1;
4556 }
4557
Kevin Wolf40b4f532009-09-09 17:53:37 +02004558 if (num_reqs == 0) {
4559 return 0;
4560 }
4561
4562 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004563 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004564 mcb->num_requests = 0;
4565 mcb->num_callbacks = num_reqs;
4566
4567 for (i = 0; i < num_reqs; i++) {
4568 mcb->callbacks[i].cb = reqs[i].cb;
4569 mcb->callbacks[i].opaque = reqs[i].opaque;
4570 }
4571
4572 // Check for mergable requests
4573 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4574
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004575 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4576
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004577 /* Run the aio requests. */
4578 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004579 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004580 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4581 reqs[i].nb_sectors, reqs[i].flags,
4582 multiwrite_cb, mcb,
4583 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004584 }
4585
4586 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004587}
4588
bellard83f64092006-08-01 16:21:11 +00004589void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004590{
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004591 acb->aiocb_info->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00004592}
4593
4594/**************************************************************/
4595/* async block device emulation */
4596
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004597typedef struct BlockDriverAIOCBSync {
4598 BlockDriverAIOCB common;
4599 QEMUBH *bh;
4600 int ret;
4601 /* vector translation state */
4602 QEMUIOVector *qiov;
4603 uint8_t *bounce;
4604 int is_write;
4605} BlockDriverAIOCBSync;
4606
4607static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4608{
Kevin Wolfb666d232010-05-05 11:44:39 +02004609 BlockDriverAIOCBSync *acb =
4610 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03004611 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004612 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004613 qemu_aio_release(acb);
4614}
4615
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004616static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004617 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4618 .cancel = bdrv_aio_cancel_em,
4619};
4620
bellard83f64092006-08-01 16:21:11 +00004621static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004622{
pbrookce1a14d2006-08-07 02:38:06 +00004623 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004624
aliguorif141eaf2009-04-07 18:43:24 +00004625 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04004626 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00004627 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004628 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004629 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004630 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00004631 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00004632}
bellardbeac80c2006-06-26 20:08:57 +00004633
aliguorif141eaf2009-04-07 18:43:24 +00004634static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4635 int64_t sector_num,
4636 QEMUIOVector *qiov,
4637 int nb_sectors,
4638 BlockDriverCompletionFunc *cb,
4639 void *opaque,
4640 int is_write)
4641
bellardea2384d2004-08-01 21:59:26 +00004642{
pbrookce1a14d2006-08-07 02:38:06 +00004643 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004644
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004645 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004646 acb->is_write = is_write;
4647 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00004648 acb->bounce = qemu_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004649 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004650
4651 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004652 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004653 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004654 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004655 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004656 }
4657
pbrookce1a14d2006-08-07 02:38:06 +00004658 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004659
pbrookce1a14d2006-08-07 02:38:06 +00004660 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004661}
4662
aliguorif141eaf2009-04-07 18:43:24 +00004663static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4664 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004665 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004666{
aliguorif141eaf2009-04-07 18:43:24 +00004667 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004668}
4669
aliguorif141eaf2009-04-07 18:43:24 +00004670static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4671 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4672 BlockDriverCompletionFunc *cb, void *opaque)
4673{
4674 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4675}
4676
Kevin Wolf68485422011-06-30 10:05:46 +02004677
4678typedef struct BlockDriverAIOCBCoroutine {
4679 BlockDriverAIOCB common;
4680 BlockRequest req;
4681 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004682 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004683 QEMUBH* bh;
4684} BlockDriverAIOCBCoroutine;
4685
4686static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4687{
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004688 AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004689 BlockDriverAIOCBCoroutine *acb =
4690 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4691 bool done = false;
4692
4693 acb->done = &done;
4694 while (!done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004695 aio_poll(aio_context, true);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004696 }
Kevin Wolf68485422011-06-30 10:05:46 +02004697}
4698
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004699static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004700 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4701 .cancel = bdrv_aio_co_cancel_em,
4702};
4703
Paolo Bonzini35246a62011-10-14 10:41:29 +02004704static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004705{
4706 BlockDriverAIOCBCoroutine *acb = opaque;
4707
4708 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004709
4710 if (acb->done) {
4711 *acb->done = true;
4712 }
4713
Kevin Wolf68485422011-06-30 10:05:46 +02004714 qemu_bh_delete(acb->bh);
4715 qemu_aio_release(acb);
4716}
4717
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004718/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4719static void coroutine_fn bdrv_co_do_rw(void *opaque)
4720{
4721 BlockDriverAIOCBCoroutine *acb = opaque;
4722 BlockDriverState *bs = acb->common.bs;
4723
4724 if (!acb->is_write) {
4725 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004726 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004727 } else {
4728 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004729 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004730 }
4731
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004732 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004733 qemu_bh_schedule(acb->bh);
4734}
4735
Kevin Wolf68485422011-06-30 10:05:46 +02004736static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4737 int64_t sector_num,
4738 QEMUIOVector *qiov,
4739 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004740 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004741 BlockDriverCompletionFunc *cb,
4742 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004743 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004744{
4745 Coroutine *co;
4746 BlockDriverAIOCBCoroutine *acb;
4747
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004748 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004749 acb->req.sector = sector_num;
4750 acb->req.nb_sectors = nb_sectors;
4751 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004752 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004753 acb->is_write = is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004754 acb->done = NULL;
Kevin Wolf68485422011-06-30 10:05:46 +02004755
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004756 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004757 qemu_coroutine_enter(co, acb);
4758
4759 return &acb->common;
4760}
4761
Paolo Bonzini07f07612011-10-17 12:32:12 +02004762static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004763{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004764 BlockDriverAIOCBCoroutine *acb = opaque;
4765 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004766
Paolo Bonzini07f07612011-10-17 12:32:12 +02004767 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004768 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004769 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004770}
4771
Paolo Bonzini07f07612011-10-17 12:32:12 +02004772BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004773 BlockDriverCompletionFunc *cb, void *opaque)
4774{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004775 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004776
Paolo Bonzini07f07612011-10-17 12:32:12 +02004777 Coroutine *co;
4778 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004779
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004780 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004781 acb->done = NULL;
4782
Paolo Bonzini07f07612011-10-17 12:32:12 +02004783 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4784 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004785
Alexander Graf016f5cf2010-05-26 17:51:49 +02004786 return &acb->common;
4787}
4788
Paolo Bonzini4265d622011-10-17 12:32:14 +02004789static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4790{
4791 BlockDriverAIOCBCoroutine *acb = opaque;
4792 BlockDriverState *bs = acb->common.bs;
4793
4794 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004795 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004796 qemu_bh_schedule(acb->bh);
4797}
4798
4799BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4800 int64_t sector_num, int nb_sectors,
4801 BlockDriverCompletionFunc *cb, void *opaque)
4802{
4803 Coroutine *co;
4804 BlockDriverAIOCBCoroutine *acb;
4805
4806 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4807
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004808 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004809 acb->req.sector = sector_num;
4810 acb->req.nb_sectors = nb_sectors;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004811 acb->done = NULL;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004812 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4813 qemu_coroutine_enter(co, acb);
4814
4815 return &acb->common;
4816}
4817
bellardea2384d2004-08-01 21:59:26 +00004818void bdrv_init(void)
4819{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004820 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004821}
pbrookce1a14d2006-08-07 02:38:06 +00004822
Markus Armbrustereb852012009-10-27 18:41:44 +01004823void bdrv_init_with_whitelist(void)
4824{
4825 use_bdrv_whitelist = 1;
4826 bdrv_init();
4827}
4828
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004829void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004830 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004831{
pbrookce1a14d2006-08-07 02:38:06 +00004832 BlockDriverAIOCB *acb;
4833
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004834 acb = g_slice_alloc(aiocb_info->aiocb_size);
4835 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004836 acb->bs = bs;
4837 acb->cb = cb;
4838 acb->opaque = opaque;
4839 return acb;
4840}
4841
4842void qemu_aio_release(void *p)
4843{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004844 BlockDriverAIOCB *acb = p;
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004845 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
pbrookce1a14d2006-08-07 02:38:06 +00004846}
bellard19cb3732006-08-19 11:45:59 +00004847
4848/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004849/* Coroutine block device emulation */
4850
4851typedef struct CoroutineIOCompletion {
4852 Coroutine *coroutine;
4853 int ret;
4854} CoroutineIOCompletion;
4855
4856static void bdrv_co_io_em_complete(void *opaque, int ret)
4857{
4858 CoroutineIOCompletion *co = opaque;
4859
4860 co->ret = ret;
4861 qemu_coroutine_enter(co->coroutine, NULL);
4862}
4863
4864static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4865 int nb_sectors, QEMUIOVector *iov,
4866 bool is_write)
4867{
4868 CoroutineIOCompletion co = {
4869 .coroutine = qemu_coroutine_self(),
4870 };
4871 BlockDriverAIOCB *acb;
4872
4873 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004874 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4875 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004876 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004877 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4878 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004879 }
4880
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004881 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004882 if (!acb) {
4883 return -EIO;
4884 }
4885 qemu_coroutine_yield();
4886
4887 return co.ret;
4888}
4889
4890static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4891 int64_t sector_num, int nb_sectors,
4892 QEMUIOVector *iov)
4893{
4894 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4895}
4896
4897static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4898 int64_t sector_num, int nb_sectors,
4899 QEMUIOVector *iov)
4900{
4901 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4902}
4903
Paolo Bonzini07f07612011-10-17 12:32:12 +02004904static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004905{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004906 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004907
Paolo Bonzini07f07612011-10-17 12:32:12 +02004908 rwco->ret = bdrv_co_flush(rwco->bs);
4909}
4910
4911int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4912{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004913 int ret;
4914
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004915 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004916 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004917 }
4918
Kevin Wolfca716362011-11-10 18:13:59 +01004919 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004920 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004921 if (bs->drv->bdrv_co_flush_to_os) {
4922 ret = bs->drv->bdrv_co_flush_to_os(bs);
4923 if (ret < 0) {
4924 return ret;
4925 }
4926 }
4927
Kevin Wolfca716362011-11-10 18:13:59 +01004928 /* But don't actually force it to the disk with cache=unsafe */
4929 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004930 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004931 }
4932
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004933 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004934 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004935 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004936 } else if (bs->drv->bdrv_aio_flush) {
4937 BlockDriverAIOCB *acb;
4938 CoroutineIOCompletion co = {
4939 .coroutine = qemu_coroutine_self(),
4940 };
4941
4942 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4943 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004944 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004945 } else {
4946 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004947 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004948 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004949 } else {
4950 /*
4951 * Some block drivers always operate in either writethrough or unsafe
4952 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4953 * know how the server works (because the behaviour is hardcoded or
4954 * depends on server-side configuration), so we can't ensure that
4955 * everything is safe on disk. Returning an error doesn't work because
4956 * that would break guests even if the server operates in writethrough
4957 * mode.
4958 *
4959 * Let's hope the user knows what he's doing.
4960 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004961 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004962 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004963 if (ret < 0) {
4964 return ret;
4965 }
4966
4967 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4968 * in the case of cache=unsafe, so there are no useless flushes.
4969 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004970flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004971 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004972}
4973
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004974void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004975{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004976 Error *local_err = NULL;
4977 int ret;
4978
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004979 if (!bs->drv) {
4980 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06004981 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004982
4983 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004984 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004985 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004986 bdrv_invalidate_cache(bs->file, &local_err);
4987 }
4988 if (local_err) {
4989 error_propagate(errp, local_err);
4990 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004991 }
4992
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004993 ret = refresh_total_sectors(bs, bs->total_sectors);
4994 if (ret < 0) {
4995 error_setg_errno(errp, -ret, "Could not refresh total sector count");
4996 return;
4997 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004998}
4999
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005000void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005001{
5002 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005003 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005004
Benoît Canetdc364f42014-01-23 21:31:32 +01005005 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005006 AioContext *aio_context = bdrv_get_aio_context(bs);
5007
5008 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005009 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005010 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005011 if (local_err) {
5012 error_propagate(errp, local_err);
5013 return;
5014 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005015 }
5016}
5017
Benoît Canet07789262012-03-23 08:36:49 +01005018void bdrv_clear_incoming_migration_all(void)
5019{
5020 BlockDriverState *bs;
5021
Benoît Canetdc364f42014-01-23 21:31:32 +01005022 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005023 AioContext *aio_context = bdrv_get_aio_context(bs);
5024
5025 aio_context_acquire(aio_context);
Benoît Canet07789262012-03-23 08:36:49 +01005026 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005027 aio_context_release(aio_context);
Benoît Canet07789262012-03-23 08:36:49 +01005028 }
5029}
5030
Paolo Bonzini07f07612011-10-17 12:32:12 +02005031int bdrv_flush(BlockDriverState *bs)
5032{
5033 Coroutine *co;
5034 RwCo rwco = {
5035 .bs = bs,
5036 .ret = NOT_DONE,
5037 };
5038
5039 if (qemu_in_coroutine()) {
5040 /* Fast-path if already in coroutine context */
5041 bdrv_flush_co_entry(&rwco);
5042 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005043 AioContext *aio_context = bdrv_get_aio_context(bs);
5044
Paolo Bonzini07f07612011-10-17 12:32:12 +02005045 co = qemu_coroutine_create(bdrv_flush_co_entry);
5046 qemu_coroutine_enter(co, &rwco);
5047 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005048 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005049 }
5050 }
5051
5052 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005053}
5054
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005055typedef struct DiscardCo {
5056 BlockDriverState *bs;
5057 int64_t sector_num;
5058 int nb_sectors;
5059 int ret;
5060} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005061static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5062{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005063 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005064
5065 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5066}
5067
Peter Lieven6f14da52013-10-24 12:06:59 +02005068/* if no limit is specified in the BlockLimits use a default
5069 * of 32768 512-byte sectors (16 MiB) per request.
5070 */
5071#define MAX_DISCARD_DEFAULT 32768
5072
Paolo Bonzini4265d622011-10-17 12:32:14 +02005073int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5074 int nb_sectors)
5075{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005076 int max_discard;
5077
Paolo Bonzini4265d622011-10-17 12:32:14 +02005078 if (!bs->drv) {
5079 return -ENOMEDIUM;
5080 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5081 return -EIO;
5082 } else if (bs->read_only) {
5083 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005084 }
5085
Fam Zhenge4654d22013-11-13 18:29:43 +08005086 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005087
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005088 /* Do nothing if disabled. */
5089 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5090 return 0;
5091 }
5092
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005093 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005094 return 0;
5095 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005096
5097 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5098 while (nb_sectors > 0) {
5099 int ret;
5100 int num = nb_sectors;
5101
5102 /* align request */
5103 if (bs->bl.discard_alignment &&
5104 num >= bs->bl.discard_alignment &&
5105 sector_num % bs->bl.discard_alignment) {
5106 if (num > bs->bl.discard_alignment) {
5107 num = bs->bl.discard_alignment;
5108 }
5109 num -= sector_num % bs->bl.discard_alignment;
5110 }
5111
5112 /* limit request size */
5113 if (num > max_discard) {
5114 num = max_discard;
5115 }
5116
5117 if (bs->drv->bdrv_co_discard) {
5118 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5119 } else {
5120 BlockDriverAIOCB *acb;
5121 CoroutineIOCompletion co = {
5122 .coroutine = qemu_coroutine_self(),
5123 };
5124
5125 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5126 bdrv_co_io_em_complete, &co);
5127 if (acb == NULL) {
5128 return -EIO;
5129 } else {
5130 qemu_coroutine_yield();
5131 ret = co.ret;
5132 }
5133 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005134 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005135 return ret;
5136 }
5137
5138 sector_num += num;
5139 nb_sectors -= num;
5140 }
5141 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005142}
5143
5144int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5145{
5146 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005147 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005148 .bs = bs,
5149 .sector_num = sector_num,
5150 .nb_sectors = nb_sectors,
5151 .ret = NOT_DONE,
5152 };
5153
5154 if (qemu_in_coroutine()) {
5155 /* Fast-path if already in coroutine context */
5156 bdrv_discard_co_entry(&rwco);
5157 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005158 AioContext *aio_context = bdrv_get_aio_context(bs);
5159
Paolo Bonzini4265d622011-10-17 12:32:14 +02005160 co = qemu_coroutine_create(bdrv_discard_co_entry);
5161 qemu_coroutine_enter(co, &rwco);
5162 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005163 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005164 }
5165 }
5166
5167 return rwco.ret;
5168}
5169
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005170/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005171/* removable device support */
5172
5173/**
5174 * Return TRUE if the media is present
5175 */
5176int bdrv_is_inserted(BlockDriverState *bs)
5177{
5178 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005179
bellard19cb3732006-08-19 11:45:59 +00005180 if (!drv)
5181 return 0;
5182 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005183 return 1;
5184 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005185}
5186
5187/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005188 * Return whether the media changed since the last call to this
5189 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005190 */
5191int bdrv_media_changed(BlockDriverState *bs)
5192{
5193 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005194
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005195 if (drv && drv->bdrv_media_changed) {
5196 return drv->bdrv_media_changed(bs);
5197 }
5198 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005199}
5200
5201/**
5202 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5203 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005204void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005205{
5206 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005207
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005208 if (drv && drv->bdrv_eject) {
5209 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005210 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005211
5212 if (bs->device_name[0] != '\0') {
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005213 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
5214 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005215 }
bellard19cb3732006-08-19 11:45:59 +00005216}
5217
bellard19cb3732006-08-19 11:45:59 +00005218/**
5219 * Lock or unlock the media (if it is locked, the user won't be able
5220 * to eject it manually).
5221 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005222void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005223{
5224 BlockDriver *drv = bs->drv;
5225
Markus Armbruster025e8492011-09-06 18:58:47 +02005226 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005227
Markus Armbruster025e8492011-09-06 18:58:47 +02005228 if (drv && drv->bdrv_lock_medium) {
5229 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005230 }
5231}
ths985a03b2007-12-24 16:10:43 +00005232
5233/* needed for generic scsi interface */
5234
5235int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5236{
5237 BlockDriver *drv = bs->drv;
5238
5239 if (drv && drv->bdrv_ioctl)
5240 return drv->bdrv_ioctl(bs, req, buf);
5241 return -ENOTSUP;
5242}
aliguori7d780662009-03-12 19:57:08 +00005243
aliguori221f7152009-03-28 17:28:41 +00005244BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5245 unsigned long int req, void *buf,
5246 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005247{
aliguori221f7152009-03-28 17:28:41 +00005248 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005249
aliguori221f7152009-03-28 17:28:41 +00005250 if (drv && drv->bdrv_aio_ioctl)
5251 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5252 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005253}
aliguorie268ca52009-04-22 20:20:00 +00005254
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005255void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005256{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005257 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005258}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005259
aliguorie268ca52009-04-22 20:20:00 +00005260void *qemu_blockalign(BlockDriverState *bs, size_t size)
5261{
Kevin Wolf339064d2013-11-28 10:23:32 +01005262 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005263}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005264
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005265/*
5266 * Check if all memory in this vector is sector aligned.
5267 */
5268bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5269{
5270 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005271 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005272
5273 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005274 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005275 return false;
5276 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005277 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005278 return false;
5279 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005280 }
5281
5282 return true;
5283}
5284
Fam Zhengb8afb522014-04-16 09:34:30 +08005285BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5286 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005287{
5288 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005289 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005290
Paolo Bonzini50717e92013-01-21 17:09:45 +01005291 assert((granularity & (granularity - 1)) == 0);
5292
Fam Zhenge4654d22013-11-13 18:29:43 +08005293 granularity >>= BDRV_SECTOR_BITS;
5294 assert(granularity);
Fam Zhengb8afb522014-04-16 09:34:30 +08005295 bitmap_size = bdrv_getlength(bs);
5296 if (bitmap_size < 0) {
5297 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5298 errno = -bitmap_size;
5299 return NULL;
5300 }
5301 bitmap_size >>= BDRV_SECTOR_BITS;
Fam Zhenge4654d22013-11-13 18:29:43 +08005302 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5303 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5304 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5305 return bitmap;
5306}
5307
5308void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5309{
5310 BdrvDirtyBitmap *bm, *next;
5311 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5312 if (bm == bitmap) {
5313 QLIST_REMOVE(bitmap, list);
5314 hbitmap_free(bitmap->bitmap);
5315 g_free(bitmap);
5316 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005317 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005318 }
5319}
5320
Fam Zheng21b56832013-11-13 18:29:44 +08005321BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5322{
5323 BdrvDirtyBitmap *bm;
5324 BlockDirtyInfoList *list = NULL;
5325 BlockDirtyInfoList **plist = &list;
5326
5327 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5328 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5329 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5330 info->count = bdrv_get_dirty_count(bs, bm);
5331 info->granularity =
5332 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5333 entry->value = info;
5334 *plist = entry;
5335 plist = &entry->next;
5336 }
5337
5338 return list;
5339}
5340
Fam Zhenge4654d22013-11-13 18:29:43 +08005341int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005342{
Fam Zhenge4654d22013-11-13 18:29:43 +08005343 if (bitmap) {
5344 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005345 } else {
5346 return 0;
5347 }
5348}
5349
Fam Zhenge4654d22013-11-13 18:29:43 +08005350void bdrv_dirty_iter_init(BlockDriverState *bs,
5351 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005352{
Fam Zhenge4654d22013-11-13 18:29:43 +08005353 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005354}
5355
5356void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5357 int nr_sectors)
5358{
Fam Zhenge4654d22013-11-13 18:29:43 +08005359 BdrvDirtyBitmap *bitmap;
5360 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5361 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005362 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005363}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005364
Fam Zhenge4654d22013-11-13 18:29:43 +08005365void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5366{
5367 BdrvDirtyBitmap *bitmap;
5368 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5369 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5370 }
5371}
5372
5373int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5374{
5375 return hbitmap_count(bitmap->bitmap);
5376}
5377
Fam Zheng9fcb0252013-08-23 09:14:46 +08005378/* Get a reference to bs */
5379void bdrv_ref(BlockDriverState *bs)
5380{
5381 bs->refcnt++;
5382}
5383
5384/* Release a previously grabbed reference to bs.
5385 * If after releasing, reference count is zero, the BlockDriverState is
5386 * deleted. */
5387void bdrv_unref(BlockDriverState *bs)
5388{
5389 assert(bs->refcnt > 0);
5390 if (--bs->refcnt == 0) {
5391 bdrv_delete(bs);
5392 }
5393}
5394
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005395struct BdrvOpBlocker {
5396 Error *reason;
5397 QLIST_ENTRY(BdrvOpBlocker) list;
5398};
5399
5400bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5401{
5402 BdrvOpBlocker *blocker;
5403 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5404 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5405 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5406 if (errp) {
5407 error_setg(errp, "Device '%s' is busy: %s",
5408 bs->device_name, error_get_pretty(blocker->reason));
5409 }
5410 return true;
5411 }
5412 return false;
5413}
5414
5415void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5416{
5417 BdrvOpBlocker *blocker;
5418 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5419
5420 blocker = g_malloc0(sizeof(BdrvOpBlocker));
5421 blocker->reason = reason;
5422 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5423}
5424
5425void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5426{
5427 BdrvOpBlocker *blocker, *next;
5428 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5429 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5430 if (blocker->reason == reason) {
5431 QLIST_REMOVE(blocker, list);
5432 g_free(blocker);
5433 }
5434 }
5435}
5436
5437void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5438{
5439 int i;
5440 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5441 bdrv_op_block(bs, i, reason);
5442 }
5443}
5444
5445void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5446{
5447 int i;
5448 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5449 bdrv_op_unblock(bs, i, reason);
5450 }
5451}
5452
5453bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5454{
5455 int i;
5456
5457 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5458 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5459 return false;
5460 }
5461 }
5462 return true;
5463}
5464
Luiz Capitulino28a72822011-09-26 17:43:50 -03005465void bdrv_iostatus_enable(BlockDriverState *bs)
5466{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005467 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005468 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005469}
5470
5471/* The I/O status is only enabled if the drive explicitly
5472 * enables it _and_ the VM is configured to stop on errors */
5473bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5474{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005475 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005476 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5477 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5478 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005479}
5480
5481void bdrv_iostatus_disable(BlockDriverState *bs)
5482{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005483 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005484}
5485
5486void bdrv_iostatus_reset(BlockDriverState *bs)
5487{
5488 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005489 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005490 if (bs->job) {
5491 block_job_iostatus_reset(bs->job);
5492 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005493 }
5494}
5495
Luiz Capitulino28a72822011-09-26 17:43:50 -03005496void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5497{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005498 assert(bdrv_iostatus_is_enabled(bs));
5499 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005500 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5501 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005502 }
5503}
5504
Christoph Hellwiga597e792011-08-25 08:26:01 +02005505void
5506bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5507 enum BlockAcctType type)
5508{
5509 assert(type < BDRV_MAX_IOTYPE);
5510
5511 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005512 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02005513 cookie->type = type;
5514}
5515
5516void
5517bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5518{
5519 assert(cookie->type < BDRV_MAX_IOTYPE);
5520
5521 bs->nr_bytes[cookie->type] += cookie->bytes;
5522 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005523 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02005524}
5525
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005526void bdrv_img_create(const char *filename, const char *fmt,
5527 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005528 char *options, uint64_t img_size, int flags,
5529 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005530{
Chunyan Liu83d05212014-06-05 17:20:51 +08005531 QemuOptsList *create_opts = NULL;
5532 QemuOpts *opts = NULL;
5533 const char *backing_fmt, *backing_file;
5534 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005535 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005536 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005537 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005538 int ret = 0;
5539
5540 /* Find driver and parse its options */
5541 drv = bdrv_find_format(fmt);
5542 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005543 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005544 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005545 }
5546
Kevin Wolf98289622013-07-10 15:47:39 +02005547 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005548 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005549 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005550 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005551 }
5552
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005553 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5554 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005555
5556 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005557 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5558 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005559
5560 /* Parse -o options */
5561 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005562 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5563 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005564 goto out;
5565 }
5566 }
5567
5568 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005569 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005570 error_setg(errp, "Backing file not supported for file format '%s'",
5571 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005572 goto out;
5573 }
5574 }
5575
5576 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005577 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005578 error_setg(errp, "Backing file format not supported for file "
5579 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005580 goto out;
5581 }
5582 }
5583
Chunyan Liu83d05212014-06-05 17:20:51 +08005584 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5585 if (backing_file) {
5586 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005587 error_setg(errp, "Error: Trying to create an image with the "
5588 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005589 goto out;
5590 }
5591 }
5592
Chunyan Liu83d05212014-06-05 17:20:51 +08005593 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5594 if (backing_fmt) {
5595 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005596 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005597 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005598 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005599 goto out;
5600 }
5601 }
5602
5603 // The size for the image must always be specified, with one exception:
5604 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005605 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5606 if (size == -1) {
5607 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005608 BlockDriverState *bs;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005609 uint64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005610 int back_flags;
5611
5612 /* backing files always opened read-only */
5613 back_flags =
5614 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005615
Max Reitzf67503e2014-02-18 18:33:05 +01005616 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005617 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005618 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005619 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005620 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005621 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005622 error_get_pretty(local_err));
5623 error_free(local_err);
5624 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005625 goto out;
5626 }
5627 bdrv_get_geometry(bs, &size);
5628 size *= 512;
5629
Chunyan Liu83d05212014-06-05 17:20:51 +08005630 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005631
5632 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005633 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005634 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005635 goto out;
5636 }
5637 }
5638
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005639 if (!quiet) {
5640 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005641 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005642 puts("");
5643 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005644
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005645 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005646
Max Reitzcc84d902013-09-06 17:14:26 +02005647 if (ret == -EFBIG) {
5648 /* This is generally a better message than whatever the driver would
5649 * deliver (especially because of the cluster_size_hint), since that
5650 * is most probably not much different from "image too large". */
5651 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005652 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005653 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005654 }
Max Reitzcc84d902013-09-06 17:14:26 +02005655 error_setg(errp, "The image size is too large for file format '%s'"
5656 "%s", fmt, cluster_size_hint);
5657 error_free(local_err);
5658 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005659 }
5660
5661out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005662 qemu_opts_del(opts);
5663 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005664 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005665 error_propagate(errp, local_err);
5666 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005667}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005668
5669AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5670{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005671 return bs->aio_context;
5672}
5673
5674void bdrv_detach_aio_context(BlockDriverState *bs)
5675{
5676 if (!bs->drv) {
5677 return;
5678 }
5679
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005680 if (bs->io_limits_enabled) {
5681 throttle_detach_aio_context(&bs->throttle_state);
5682 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005683 if (bs->drv->bdrv_detach_aio_context) {
5684 bs->drv->bdrv_detach_aio_context(bs);
5685 }
5686 if (bs->file) {
5687 bdrv_detach_aio_context(bs->file);
5688 }
5689 if (bs->backing_hd) {
5690 bdrv_detach_aio_context(bs->backing_hd);
5691 }
5692
5693 bs->aio_context = NULL;
5694}
5695
5696void bdrv_attach_aio_context(BlockDriverState *bs,
5697 AioContext *new_context)
5698{
5699 if (!bs->drv) {
5700 return;
5701 }
5702
5703 bs->aio_context = new_context;
5704
5705 if (bs->backing_hd) {
5706 bdrv_attach_aio_context(bs->backing_hd, new_context);
5707 }
5708 if (bs->file) {
5709 bdrv_attach_aio_context(bs->file, new_context);
5710 }
5711 if (bs->drv->bdrv_attach_aio_context) {
5712 bs->drv->bdrv_attach_aio_context(bs, new_context);
5713 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005714 if (bs->io_limits_enabled) {
5715 throttle_attach_aio_context(&bs->throttle_state, new_context);
5716 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005717}
5718
5719void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5720{
5721 bdrv_drain_all(); /* ensure there are no in-flight requests */
5722
5723 bdrv_detach_aio_context(bs);
5724
5725 /* This function executes in the old AioContext so acquire the new one in
5726 * case it runs in a different thread.
5727 */
5728 aio_context_acquire(new_context);
5729 bdrv_attach_aio_context(bs, new_context);
5730 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005731}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005732
5733void bdrv_add_before_write_notifier(BlockDriverState *bs,
5734 NotifierWithReturn *notifier)
5735{
5736 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5737}
Max Reitz6f176b42013-09-03 10:09:50 +02005738
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005739int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005740{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005741 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005742 return -ENOTSUP;
5743 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005744 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005745}
Benoît Canetf6186f42013-10-02 14:33:48 +02005746
Benoît Canetb5042a32014-03-03 19:11:34 +01005747/* This function will be called by the bdrv_recurse_is_first_non_filter method
5748 * of block filter and by bdrv_is_first_non_filter.
5749 * It is used to test if the given bs is the candidate or recurse more in the
5750 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005751 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005752bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5753 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005754{
Benoît Canetb5042a32014-03-03 19:11:34 +01005755 /* return false if basic checks fails */
5756 if (!bs || !bs->drv) {
5757 return false;
5758 }
5759
5760 /* the code reached a non block filter driver -> check if the bs is
5761 * the same as the candidate. It's the recursion termination condition.
5762 */
5763 if (!bs->drv->is_filter) {
5764 return bs == candidate;
5765 }
5766 /* Down this path the driver is a block filter driver */
5767
5768 /* If the block filter recursion method is defined use it to recurse down
5769 * the node graph.
5770 */
5771 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005772 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5773 }
5774
Benoît Canetb5042a32014-03-03 19:11:34 +01005775 /* the driver is a block filter but don't allow to recurse -> return false
5776 */
5777 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005778}
5779
5780/* This function checks if the candidate is the first non filter bs down it's
5781 * bs chain. Since we don't have pointers to parents it explore all bs chains
5782 * from the top. Some filters can choose not to pass down the recursion.
5783 */
5784bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5785{
5786 BlockDriverState *bs;
5787
5788 /* walk down the bs forest recursively */
5789 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5790 bool perm;
5791
Benoît Canetb5042a32014-03-03 19:11:34 +01005792 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005793 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005794
5795 /* candidate is the first non filter */
5796 if (perm) {
5797 return true;
5798 }
5799 }
5800
5801 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005802}