blob: 9fee0d28ed3a5e8380371919186da63b9d06926c [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020038#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000039
Juan Quintela71e72a12009-07-27 16:12:56 +020040#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000041#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000044#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000045#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000046#include <sys/disk.h>
47#endif
blueswir1c5e97232009-03-07 20:06:23 +000048#endif
bellard7674e7b2005-04-26 21:59:26 +000049
aliguori49dc7682009-03-08 16:26:59 +000050#ifdef _WIN32
51#include <windows.h>
52#endif
53
Fam Zhenge4654d22013-11-13 18:29:43 +080054struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
57};
58
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010059#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60
Stefan Hajnoczi2a871512014-07-07 15:15:53 +020061#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
62
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020063static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020064static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000065 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020066 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020067static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000068 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020069 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020070static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000078 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010079static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000081 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020082static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
86 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020087 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020088 void *opaque,
89 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010090static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010091static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020092 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000093
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010094static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000096
Benoît Canetdc364f42014-01-23 21:31:32 +010097static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000102
Markus Armbrustereb852012009-10-27 18:41:44 +0100103/* If non-zero, use only whitelisted block drivers */
104static int use_bdrv_whitelist;
105
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000106#ifdef _WIN32
107static int is_windows_drive_prefix(const char *filename)
108{
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
112}
113
114int is_windows_drive(const char *filename)
115{
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
123}
124#endif
125
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800126/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200127void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
129{
130 int i;
131
132 throttle_config(&bs->throttle_state, cfg);
133
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
136 }
137}
138
139/* this function drain all the throttled IOs */
140static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
141{
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
145
146 bs->io_limits_enabled = false;
147
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
151 }
152 }
153
154 bs->io_limits_enabled = enabled;
155
156 return drained;
157}
158
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800159void bdrv_io_limits_disable(BlockDriverState *bs)
160{
161 bs->io_limits_enabled = false;
162
Benoît Canetcc0681c2013-09-02 14:14:39 +0200163 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800166}
167
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169{
170 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800172}
173
Benoît Canetcc0681c2013-09-02 14:14:39 +0200174static void bdrv_throttle_write_timer_cb(void *opaque)
175{
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
178}
179
180/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800181void bdrv_io_limits_enable(BlockDriverState *bs)
182{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200185 bdrv_get_aio_context(bs),
Benoît Canetcc0681c2013-09-02 14:14:39 +0200186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800190 bs->io_limits_enabled = true;
191}
192
Benoît Canetcc0681c2013-09-02 14:14:39 +0200193/* This function makes an IO wait if needed
194 *
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
197 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800198static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100199 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200200 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800201{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800204
Benoît Canetcc0681c2013-09-02 14:14:39 +0200205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800209 }
210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100212 throttle_account(&bs->throttle_state, is_write, bytes);
213
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214
Benoît Canetcc0681c2013-09-02 14:14:39 +0200215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218 }
219
Benoît Canetcc0681c2013-09-02 14:14:39 +0200220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800222}
223
Kevin Wolf339064d2013-11-28 10:23:32 +0100224size_t bdrv_opt_mem_align(BlockDriverState *bs)
225{
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
229 }
230
231 return bs->bl.opt_mem_alignment;
232}
233
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000234/* check if the path starts with "<protocol>:" */
235static int path_has_protocol(const char *path)
236{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200237 const char *p;
238
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000239#ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
243 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200244 p = path + strcspn(path, ":/\\");
245#else
246 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247#endif
248
Paolo Bonzini947995c2012-05-08 16:51:48 +0200249 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000250}
251
bellard83f64092006-08-01 16:21:11 +0000252int path_is_absolute(const char *path)
253{
bellard21664422007-01-07 18:22:37 +0000254#ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000257 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200258 }
259 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000260#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200261 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000262#endif
bellard83f64092006-08-01 16:21:11 +0000263}
264
265/* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
271{
272 const char *p, *p1;
273 int len;
274
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000285 p1 = strrchr(base_path, '/');
286#ifdef _WIN32
287 {
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
292 }
293#endif
bellard83f64092006-08-01 16:21:11 +0000294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
306 }
307}
308
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200309void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
310{
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
315 }
316}
317
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500318void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000319{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
324
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
327 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200332 }
bellard83f64092006-08-01 16:21:11 +0000333 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200334
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000336}
bellardb3380822004-03-14 21:38:54 +0000337
Markus Armbruster7f06d472014-10-07 13:59:12 +0200338BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000339{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200340 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200341
Markus Armbrustere4e99862014-10-07 13:59:03 +0200342 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200343 return bs;
344}
345
346BlockDriverState *bdrv_new(void)
347{
348 BlockDriverState *bs;
349 int i;
350
Markus Armbruster5839e532014-08-19 10:31:08 +0200351 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800352 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800353 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
354 QLIST_INIT(&bs->op_blockers[i]);
355 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300356 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200357 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200358 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200359 qemu_co_queue_init(&bs->throttled_reqs[0]);
360 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800361 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200362 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200363
bellardb3380822004-03-14 21:38:54 +0000364 return bs;
365}
366
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200367void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
368{
369 notifier_list_add(&bs->close_notifiers, notify);
370}
371
bellardea2384d2004-08-01 21:59:26 +0000372BlockDriver *bdrv_find_format(const char *format_name)
373{
374 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100375 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
376 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000377 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100378 }
bellardea2384d2004-08-01 21:59:26 +0000379 }
380 return NULL;
381}
382
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800383static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100384{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800385 static const char *whitelist_rw[] = {
386 CONFIG_BDRV_RW_WHITELIST
387 };
388 static const char *whitelist_ro[] = {
389 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100390 };
391 const char **p;
392
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800393 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100394 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800395 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100396
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800397 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100398 if (!strcmp(drv->format_name, *p)) {
399 return 1;
400 }
401 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800402 if (read_only) {
403 for (p = whitelist_ro; *p; p++) {
404 if (!strcmp(drv->format_name, *p)) {
405 return 1;
406 }
407 }
408 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100409 return 0;
410}
411
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800412BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
413 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100414{
415 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800416 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100417}
418
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800419typedef struct CreateCo {
420 BlockDriver *drv;
421 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800422 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800423 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200424 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800425} CreateCo;
426
427static void coroutine_fn bdrv_create_co_entry(void *opaque)
428{
Max Reitzcc84d902013-09-06 17:14:26 +0200429 Error *local_err = NULL;
430 int ret;
431
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800432 CreateCo *cco = opaque;
433 assert(cco->drv);
434
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800435 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100436 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200437 error_propagate(&cco->err, local_err);
438 }
439 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800440}
441
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200442int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800443 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000444{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800445 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200446
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800447 Coroutine *co;
448 CreateCo cco = {
449 .drv = drv,
450 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800451 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800452 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200453 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800454 };
455
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800456 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200457 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300458 ret = -ENOTSUP;
459 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800460 }
461
462 if (qemu_in_coroutine()) {
463 /* Fast-path if already in coroutine context */
464 bdrv_create_co_entry(&cco);
465 } else {
466 co = qemu_coroutine_create(bdrv_create_co_entry);
467 qemu_coroutine_enter(co, &cco);
468 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200469 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800470 }
471 }
472
473 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200474 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100475 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200476 error_propagate(errp, cco.err);
477 } else {
478 error_setg_errno(errp, -ret, "Could not create image");
479 }
480 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800481
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300482out:
483 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800484 return ret;
bellardea2384d2004-08-01 21:59:26 +0000485}
486
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800487int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200488{
489 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200490 Error *local_err = NULL;
491 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200492
Kevin Wolf98289622013-07-10 15:47:39 +0200493 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200494 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200495 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000496 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200497 }
498
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800499 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100500 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200501 error_propagate(errp, local_err);
502 }
503 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200504}
505
Kevin Wolf3baca892014-07-16 17:48:16 +0200506void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100507{
508 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200509 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100510
511 memset(&bs->bl, 0, sizeof(bs->bl));
512
Kevin Wolf466ad822013-12-11 19:50:32 +0100513 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200514 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100515 }
516
517 /* Take some limits from the children as a default */
518 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200519 bdrv_refresh_limits(bs->file, &local_err);
520 if (local_err) {
521 error_propagate(errp, local_err);
522 return;
523 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100524 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100525 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
526 } else {
527 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100528 }
529
530 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200531 bdrv_refresh_limits(bs->backing_hd, &local_err);
532 if (local_err) {
533 error_propagate(errp, local_err);
534 return;
535 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100536 bs->bl.opt_transfer_length =
537 MAX(bs->bl.opt_transfer_length,
538 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100539 bs->bl.opt_mem_alignment =
540 MAX(bs->bl.opt_mem_alignment,
541 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100542 }
543
544 /* Then let the driver override it */
545 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200546 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100547 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100548}
549
Jim Meyeringeba25052012-05-28 09:27:54 +0200550/*
551 * Create a uniquely-named empty temporary file.
552 * Return 0 upon success, otherwise a negative errno value.
553 */
554int get_tmp_filename(char *filename, int size)
555{
bellardd5249392004-08-03 21:14:23 +0000556#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000557 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200558 /* GetTempFileName requires that its output buffer (4th param)
559 have length MAX_PATH or greater. */
560 assert(size >= MAX_PATH);
561 return (GetTempPath(MAX_PATH, temp_dir)
562 && GetTempFileName(temp_dir, "qem", 0, filename)
563 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000564#else
bellardea2384d2004-08-01 21:59:26 +0000565 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000566 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000567 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530568 if (!tmpdir) {
569 tmpdir = "/var/tmp";
570 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200571 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
572 return -EOVERFLOW;
573 }
bellardea2384d2004-08-01 21:59:26 +0000574 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800575 if (fd < 0) {
576 return -errno;
577 }
578 if (close(fd) != 0) {
579 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200580 return -errno;
581 }
582 return 0;
bellardd5249392004-08-03 21:14:23 +0000583#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200584}
bellardea2384d2004-08-01 21:59:26 +0000585
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200586/*
587 * Detect host devices. By convention, /dev/cdrom[N] is always
588 * recognized as a host CDROM.
589 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200590static BlockDriver *find_hdev_driver(const char *filename)
591{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200592 int score_max = 0, score;
593 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200594
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100595 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200596 if (d->bdrv_probe_device) {
597 score = d->bdrv_probe_device(filename);
598 if (score > score_max) {
599 score_max = score;
600 drv = d;
601 }
602 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200603 }
604
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200605 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200606}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200607
Kevin Wolf98289622013-07-10 15:47:39 +0200608BlockDriver *bdrv_find_protocol(const char *filename,
609 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200610{
611 BlockDriver *drv1;
612 char protocol[128];
613 int len;
614 const char *p;
615
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200616 /* TODO Drivers without bdrv_file_open must be specified explicitly */
617
Christoph Hellwig39508e72010-06-23 12:25:17 +0200618 /*
619 * XXX(hch): we really should not let host device detection
620 * override an explicit protocol specification, but moving this
621 * later breaks access to device names with colons in them.
622 * Thanks to the brain-dead persistent naming schemes on udev-
623 * based Linux systems those actually are quite common.
624 */
625 drv1 = find_hdev_driver(filename);
626 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200627 return drv1;
628 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200629
Kevin Wolf98289622013-07-10 15:47:39 +0200630 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200631 return bdrv_find_format("file");
632 }
Kevin Wolf98289622013-07-10 15:47:39 +0200633
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000634 p = strchr(filename, ':');
635 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200636 len = p - filename;
637 if (len > sizeof(protocol) - 1)
638 len = sizeof(protocol) - 1;
639 memcpy(protocol, filename, len);
640 protocol[len] = '\0';
641 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
642 if (drv1->protocol_name &&
643 !strcmp(drv1->protocol_name, protocol)) {
644 return drv1;
645 }
646 }
647 return NULL;
648}
649
Kevin Wolff500a6d2012-11-12 17:35:27 +0100650static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200651 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000652{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100653 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000654 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000655 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100656 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700657
Kevin Wolf08a00552010-06-01 18:37:31 +0200658 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100659 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200660 drv = bdrv_find_format("raw");
661 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200662 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200663 ret = -ENOENT;
664 }
665 *pdrv = drv;
666 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700667 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700668
bellard83f64092006-08-01 16:21:11 +0000669 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000670 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200671 error_setg_errno(errp, -ret, "Could not read image for determining its "
672 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200673 *pdrv = NULL;
674 return ret;
bellard83f64092006-08-01 16:21:11 +0000675 }
676
bellardea2384d2004-08-01 21:59:26 +0000677 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200678 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100679 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000680 if (drv1->bdrv_probe) {
681 score = drv1->bdrv_probe(buf, ret, filename);
682 if (score > score_max) {
683 score_max = score;
684 drv = drv1;
685 }
bellardea2384d2004-08-01 21:59:26 +0000686 }
687 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200688 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200689 error_setg(errp, "Could not determine image format: No compatible "
690 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200691 ret = -ENOENT;
692 }
693 *pdrv = drv;
694 return ret;
bellardea2384d2004-08-01 21:59:26 +0000695}
696
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100697/**
698 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200699 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100700 */
701static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
702{
703 BlockDriver *drv = bs->drv;
704
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700705 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
706 if (bs->sg)
707 return 0;
708
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100709 /* query actual device if possible, otherwise just trust the hint */
710 if (drv->bdrv_getlength) {
711 int64_t length = drv->bdrv_getlength(bs);
712 if (length < 0) {
713 return length;
714 }
Fam Zheng7e382002013-11-06 19:48:06 +0800715 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100716 }
717
718 bs->total_sectors = hint;
719 return 0;
720}
721
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100722/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100723 * Set open flags for a given discard mode
724 *
725 * Return 0 on success, -1 if the discard mode was invalid.
726 */
727int bdrv_parse_discard_flags(const char *mode, int *flags)
728{
729 *flags &= ~BDRV_O_UNMAP;
730
731 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
732 /* do nothing */
733 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
734 *flags |= BDRV_O_UNMAP;
735 } else {
736 return -1;
737 }
738
739 return 0;
740}
741
742/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100743 * Set open flags for a given cache mode
744 *
745 * Return 0 on success, -1 if the cache mode was invalid.
746 */
747int bdrv_parse_cache_flags(const char *mode, int *flags)
748{
749 *flags &= ~BDRV_O_CACHE_MASK;
750
751 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
752 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100753 } else if (!strcmp(mode, "directsync")) {
754 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100755 } else if (!strcmp(mode, "writeback")) {
756 *flags |= BDRV_O_CACHE_WB;
757 } else if (!strcmp(mode, "unsafe")) {
758 *flags |= BDRV_O_CACHE_WB;
759 *flags |= BDRV_O_NO_FLUSH;
760 } else if (!strcmp(mode, "writethrough")) {
761 /* this is the default */
762 } else {
763 return -1;
764 }
765
766 return 0;
767}
768
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000769/**
770 * The copy-on-read flag is actually a reference count so multiple users may
771 * use the feature without worrying about clobbering its previous state.
772 * Copy-on-read stays enabled until all users have called to disable it.
773 */
774void bdrv_enable_copy_on_read(BlockDriverState *bs)
775{
776 bs->copy_on_read++;
777}
778
779void bdrv_disable_copy_on_read(BlockDriverState *bs)
780{
781 assert(bs->copy_on_read > 0);
782 bs->copy_on_read--;
783}
784
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200785/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200786 * Returns the flags that a temporary snapshot should get, based on the
787 * originally requested flags (the originally requested image will have flags
788 * like a backing file)
789 */
790static int bdrv_temp_snapshot_flags(int flags)
791{
792 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
793}
794
795/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200796 * Returns the flags that bs->file should get, based on the given flags for
797 * the parent BDS
798 */
799static int bdrv_inherited_flags(int flags)
800{
801 /* Enable protocol handling, disable format probing for bs->file */
802 flags |= BDRV_O_PROTOCOL;
803
804 /* Our block drivers take care to send flushes and respect unmap policy,
805 * so we can enable both unconditionally on lower layers. */
806 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
807
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200808 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200809 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200810
811 return flags;
812}
813
Kevin Wolf317fc442014-04-25 13:27:34 +0200814/*
815 * Returns the flags that bs->backing_hd should get, based on the given flags
816 * for the parent BDS
817 */
818static int bdrv_backing_flags(int flags)
819{
820 /* backing files always opened read-only */
821 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
822
823 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200824 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200825
826 return flags;
827}
828
Kevin Wolf7b272452012-11-12 17:05:39 +0100829static int bdrv_open_flags(BlockDriverState *bs, int flags)
830{
831 int open_flags = flags | BDRV_O_CACHE_WB;
832
833 /*
834 * Clear flags that are internal to the block layer before opening the
835 * image.
836 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200837 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100838
839 /*
840 * Snapshots should be writable.
841 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200842 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100843 open_flags |= BDRV_O_RDWR;
844 }
845
846 return open_flags;
847}
848
Kevin Wolf636ea372014-01-24 14:11:52 +0100849static void bdrv_assign_node_name(BlockDriverState *bs,
850 const char *node_name,
851 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100852{
853 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100854 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100855 }
856
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200857 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200858 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200859 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100860 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100861 }
862
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100863 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200864 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100865 error_setg(errp, "node-name=%s is conflicting with a device id",
866 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100867 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100868 }
869
Benoît Canet6913c0c2014-01-23 21:31:33 +0100870 /* takes care of avoiding duplicates node names */
871 if (bdrv_find_node(node_name)) {
872 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100873 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100874 }
875
876 /* copy node name into the bs and insert it into the graph list */
877 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
878 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100879}
880
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200881/*
Kevin Wolf57915332010-04-14 15:24:50 +0200882 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100883 *
884 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200885 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100886static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200887 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200888{
889 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200890 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100891 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200892 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200893
894 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200895 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100896 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200897
Kevin Wolf45673672013-04-22 17:48:40 +0200898 if (file != NULL) {
899 filename = file->filename;
900 } else {
901 filename = qdict_get_try_str(options, "filename");
902 }
903
Kevin Wolf765003d2014-02-03 14:49:42 +0100904 if (drv->bdrv_needs_filename && !filename) {
905 error_setg(errp, "The '%s' block driver requires a file name",
906 drv->format_name);
907 return -EINVAL;
908 }
909
Kevin Wolf45673672013-04-22 17:48:40 +0200910 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100911
Benoît Canet6913c0c2014-01-23 21:31:33 +0100912 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100913 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200914 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100915 error_propagate(errp, local_err);
916 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100917 }
918 qdict_del(options, "node-name");
919
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100920 /* bdrv_open() with directly using a protocol as drv. This layer is already
921 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
922 * and return immediately. */
923 if (file != NULL && drv->bdrv_file_open) {
924 bdrv_swap(file, bs);
925 return 0;
926 }
927
Kevin Wolf57915332010-04-14 15:24:50 +0200928 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100929 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100930 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800931 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800932 open_flags = bdrv_open_flags(bs, flags);
933 bs->read_only = !(open_flags & BDRV_O_RDWR);
Kevin Wolf20cca272014-06-04 14:33:27 +0200934 bs->growable = !!(flags & BDRV_O_PROTOCOL);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800935
936 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200937 error_setg(errp,
938 !bs->read_only && bdrv_is_whitelisted(drv, true)
939 ? "Driver '%s' can only be used for read-only devices"
940 : "Driver '%s' is not whitelisted",
941 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800942 return -ENOTSUP;
943 }
Kevin Wolf57915332010-04-14 15:24:50 +0200944
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000945 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200946 if (flags & BDRV_O_COPY_ON_READ) {
947 if (!bs->read_only) {
948 bdrv_enable_copy_on_read(bs);
949 } else {
950 error_setg(errp, "Can't use copy-on-read on read-only device");
951 return -EINVAL;
952 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000953 }
954
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100955 if (filename != NULL) {
956 pstrcpy(bs->filename, sizeof(bs->filename), filename);
957 } else {
958 bs->filename[0] = '\0';
959 }
Max Reitz91af7012014-07-18 20:24:56 +0200960 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200961
Kevin Wolf57915332010-04-14 15:24:50 +0200962 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500963 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200964
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100965 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100966
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200967 /* Open the image, either directly or using a protocol */
968 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100969 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200970 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200971 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100972 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200973 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200974 error_setg(errp, "Can't use '%s' as a block driver for the "
975 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200976 ret = -EINVAL;
977 goto free_and_fail;
978 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100979 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200980 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200981 }
982
Kevin Wolf57915332010-04-14 15:24:50 +0200983 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100984 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200985 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800986 } else if (bs->filename[0]) {
987 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200988 } else {
989 error_setg_errno(errp, -ret, "Could not open image");
990 }
Kevin Wolf57915332010-04-14 15:24:50 +0200991 goto free_and_fail;
992 }
993
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100994 ret = refresh_total_sectors(bs, bs->total_sectors);
995 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200996 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100997 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200998 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100999
Kevin Wolf3baca892014-07-16 17:48:16 +02001000 bdrv_refresh_limits(bs, &local_err);
1001 if (local_err) {
1002 error_propagate(errp, local_err);
1003 ret = -EINVAL;
1004 goto free_and_fail;
1005 }
1006
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001007 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001008 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001009 return 0;
1010
1011free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001012 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001013 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001014 bs->opaque = NULL;
1015 bs->drv = NULL;
1016 return ret;
1017}
1018
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001019static QDict *parse_json_filename(const char *filename, Error **errp)
1020{
1021 QObject *options_obj;
1022 QDict *options;
1023 int ret;
1024
1025 ret = strstart(filename, "json:", &filename);
1026 assert(ret);
1027
1028 options_obj = qobject_from_json(filename);
1029 if (!options_obj) {
1030 error_setg(errp, "Could not parse the JSON options");
1031 return NULL;
1032 }
1033
1034 if (qobject_type(options_obj) != QTYPE_QDICT) {
1035 qobject_decref(options_obj);
1036 error_setg(errp, "Invalid JSON object given");
1037 return NULL;
1038 }
1039
1040 options = qobject_to_qdict(options_obj);
1041 qdict_flatten(options);
1042
1043 return options;
1044}
1045
Kevin Wolf57915332010-04-14 15:24:50 +02001046/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001047 * Fills in default options for opening images and converts the legacy
1048 * filename/flags pair to option QDict entries.
1049 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001050static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001051 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001052{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001053 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001054 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001055 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001056 bool parse_filename = false;
1057 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001058
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001059 /* Parse json: pseudo-protocol */
1060 if (filename && g_str_has_prefix(filename, "json:")) {
1061 QDict *json_options = parse_json_filename(filename, &local_err);
1062 if (local_err) {
1063 error_propagate(errp, local_err);
1064 return -EINVAL;
1065 }
1066
1067 /* Options given in the filename have lower priority than options
1068 * specified directly */
1069 qdict_join(*options, json_options, false);
1070 QDECREF(json_options);
1071 *pfilename = filename = NULL;
1072 }
1073
Kevin Wolff54120f2014-05-26 11:09:59 +02001074 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001075 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001076 if (!qdict_haskey(*options, "filename")) {
1077 qdict_put(*options, "filename", qstring_from_str(filename));
1078 parse_filename = true;
1079 } else {
1080 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1081 "the same time");
1082 return -EINVAL;
1083 }
1084 }
1085
1086 /* Find the right block driver */
1087 filename = qdict_get_try_str(*options, "filename");
1088 drvname = qdict_get_try_str(*options, "driver");
1089
Kevin Wolf17b005f2014-05-27 10:50:29 +02001090 if (drv) {
1091 if (drvname) {
1092 error_setg(errp, "Driver specified twice");
1093 return -EINVAL;
1094 }
1095 drvname = drv->format_name;
1096 qdict_put(*options, "driver", qstring_from_str(drvname));
1097 } else {
1098 if (!drvname && protocol) {
1099 if (filename) {
1100 drv = bdrv_find_protocol(filename, parse_filename);
1101 if (!drv) {
1102 error_setg(errp, "Unknown protocol");
1103 return -EINVAL;
1104 }
1105
1106 drvname = drv->format_name;
1107 qdict_put(*options, "driver", qstring_from_str(drvname));
1108 } else {
1109 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001110 return -EINVAL;
1111 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001112 } else if (drvname) {
1113 drv = bdrv_find_format(drvname);
1114 if (!drv) {
1115 error_setg(errp, "Unknown driver '%s'", drvname);
1116 return -ENOENT;
1117 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001118 }
1119 }
1120
Kevin Wolf17b005f2014-05-27 10:50:29 +02001121 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001122
1123 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001124 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001125 drv->bdrv_parse_filename(filename, *options, &local_err);
1126 if (local_err) {
1127 error_propagate(errp, local_err);
1128 return -EINVAL;
1129 }
1130
1131 if (!drv->bdrv_needs_filename) {
1132 qdict_del(*options, "filename");
1133 }
1134 }
1135
1136 return 0;
1137}
1138
Fam Zheng8d24cce2014-05-23 21:29:45 +08001139void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1140{
1141
Fam Zheng826b6ca2014-05-23 21:29:47 +08001142 if (bs->backing_hd) {
1143 assert(bs->backing_blocker);
1144 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1145 } else if (backing_hd) {
1146 error_setg(&bs->backing_blocker,
1147 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001148 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001149 }
1150
Fam Zheng8d24cce2014-05-23 21:29:45 +08001151 bs->backing_hd = backing_hd;
1152 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001153 error_free(bs->backing_blocker);
1154 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001155 goto out;
1156 }
1157 bs->open_flags &= ~BDRV_O_NO_BACKING;
1158 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1159 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1160 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001161
1162 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1163 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1164 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1165 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001166out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001167 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001168}
1169
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001170/*
1171 * Opens the backing file for a BlockDriverState if not yet open
1172 *
1173 * options is a QDict of options to pass to the block drivers, or NULL for an
1174 * empty set of options. The reference to the QDict is transferred to this
1175 * function (even on failure), so if the caller intends to reuse the dictionary,
1176 * it needs to use QINCREF() before calling bdrv_file_open.
1177 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001178int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001179{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001180 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001181 int ret = 0;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001182 BlockDriver *back_drv = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001183 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001184 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001185
1186 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001187 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001188 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001189 }
1190
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001191 /* NULL means an empty set of options */
1192 if (options == NULL) {
1193 options = qdict_new();
1194 }
1195
Paolo Bonzini9156df12012-10-18 16:49:17 +02001196 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001197 if (qdict_haskey(options, "file.filename")) {
1198 backing_filename[0] = '\0';
1199 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001200 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001201 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001202 } else {
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001203 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001204 }
1205
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001206 if (!bs->drv || !bs->drv->supports_backing) {
1207 ret = -EINVAL;
1208 error_setg(errp, "Driver doesn't support backing files");
1209 QDECREF(options);
1210 goto free_exit;
1211 }
1212
Markus Armbrustere4e99862014-10-07 13:59:03 +02001213 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001214
Paolo Bonzini9156df12012-10-18 16:49:17 +02001215 if (bs->backing_format[0] != '\0') {
1216 back_drv = bdrv_find_format(bs->backing_format);
1217 }
1218
Max Reitzf67503e2014-02-18 18:33:05 +01001219 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001220 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001221 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolf317fc442014-04-25 13:27:34 +02001222 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001223 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001224 bdrv_unref(backing_hd);
1225 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001226 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001227 error_setg(errp, "Could not open backing file: %s",
1228 error_get_pretty(local_err));
1229 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001230 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001231 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001232 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001233
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001234free_exit:
1235 g_free(backing_filename);
1236 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001237}
1238
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001239/*
Max Reitzda557aa2013-12-20 19:28:11 +01001240 * Opens a disk image whose options are given as BlockdevRef in another block
1241 * device's options.
1242 *
Max Reitzda557aa2013-12-20 19:28:11 +01001243 * If allow_none is true, no image will be opened if filename is false and no
1244 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1245 *
1246 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1247 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1248 * itself, all options starting with "${bdref_key}." are considered part of the
1249 * BlockdevRef.
1250 *
1251 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001252 *
1253 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001254 */
1255int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1256 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001257 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001258{
1259 QDict *image_options;
1260 int ret;
1261 char *bdref_key_dot;
1262 const char *reference;
1263
Max Reitzf67503e2014-02-18 18:33:05 +01001264 assert(pbs);
1265 assert(*pbs == NULL);
1266
Max Reitzda557aa2013-12-20 19:28:11 +01001267 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1268 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1269 g_free(bdref_key_dot);
1270
1271 reference = qdict_get_try_str(options, bdref_key);
1272 if (!filename && !reference && !qdict_size(image_options)) {
1273 if (allow_none) {
1274 ret = 0;
1275 } else {
1276 error_setg(errp, "A block device must be specified for \"%s\"",
1277 bdref_key);
1278 ret = -EINVAL;
1279 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001280 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001281 goto done;
1282 }
1283
Max Reitzf7d9fd82014-02-18 18:33:12 +01001284 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001285
1286done:
1287 qdict_del(options, bdref_key);
1288 return ret;
1289}
1290
Chen Gang6b8aeca2014-06-23 23:28:23 +08001291int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001292{
1293 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001294 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001295 int64_t total_size;
1296 BlockDriver *bdrv_qcow2;
Chunyan Liu83d05212014-06-05 17:20:51 +08001297 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001298 QDict *snapshot_options;
1299 BlockDriverState *bs_snapshot;
1300 Error *local_err;
1301 int ret;
1302
1303 /* if snapshot, we create a temporary backing file and open it
1304 instead of opening 'filename' directly */
1305
1306 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001307 total_size = bdrv_getlength(bs);
1308 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001309 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001310 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001311 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001312 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001313
1314 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001315 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001316 if (ret < 0) {
1317 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001318 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001319 }
1320
1321 bdrv_qcow2 = bdrv_find_format("qcow2");
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001322 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1323 &error_abort);
Chunyan Liu83d05212014-06-05 17:20:51 +08001324 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001325 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001326 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001327 if (ret < 0) {
1328 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1329 "'%s': %s", tmp_filename,
1330 error_get_pretty(local_err));
1331 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001332 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001333 }
1334
1335 /* Prepare a new options QDict for the temporary file */
1336 snapshot_options = qdict_new();
1337 qdict_put(snapshot_options, "file.driver",
1338 qstring_from_str("file"));
1339 qdict_put(snapshot_options, "file.filename",
1340 qstring_from_str(tmp_filename));
1341
Markus Armbrustere4e99862014-10-07 13:59:03 +02001342 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001343
1344 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001345 flags, bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001346 if (ret < 0) {
1347 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001348 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001349 }
1350
1351 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001352
1353out:
1354 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001355 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001356}
1357
Max Reitzda557aa2013-12-20 19:28:11 +01001358/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001359 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001360 *
1361 * options is a QDict of options to pass to the block drivers, or NULL for an
1362 * empty set of options. The reference to the QDict belongs to the block layer
1363 * after the call (even on failure), so if the caller intends to reuse the
1364 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001365 *
1366 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1367 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001368 *
1369 * The reference parameter may be used to specify an existing block device which
1370 * should be opened. If specified, neither options nor a filename may be given,
1371 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001372 */
Max Reitzddf56362014-02-18 18:33:06 +01001373int bdrv_open(BlockDriverState **pbs, const char *filename,
1374 const char *reference, QDict *options, int flags,
1375 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001376{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001377 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001378 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001379 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001380 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001381 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001382
Max Reitzf67503e2014-02-18 18:33:05 +01001383 assert(pbs);
1384
Max Reitzddf56362014-02-18 18:33:06 +01001385 if (reference) {
1386 bool options_non_empty = options ? qdict_size(options) : false;
1387 QDECREF(options);
1388
1389 if (*pbs) {
1390 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1391 "another block device");
1392 return -EINVAL;
1393 }
1394
1395 if (filename || options_non_empty) {
1396 error_setg(errp, "Cannot reference an existing block device with "
1397 "additional options or a new filename");
1398 return -EINVAL;
1399 }
1400
1401 bs = bdrv_lookup_bs(reference, reference, errp);
1402 if (!bs) {
1403 return -ENODEV;
1404 }
1405 bdrv_ref(bs);
1406 *pbs = bs;
1407 return 0;
1408 }
1409
Max Reitzf67503e2014-02-18 18:33:05 +01001410 if (*pbs) {
1411 bs = *pbs;
1412 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001413 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001414 }
1415
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001416 /* NULL means an empty set of options */
1417 if (options == NULL) {
1418 options = qdict_new();
1419 }
1420
Kevin Wolf17b005f2014-05-27 10:50:29 +02001421 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001422 if (local_err) {
1423 goto fail;
1424 }
1425
Kevin Wolf76c591b2014-06-04 14:19:44 +02001426 /* Find the right image format driver */
1427 drv = NULL;
1428 drvname = qdict_get_try_str(options, "driver");
1429 if (drvname) {
1430 drv = bdrv_find_format(drvname);
1431 qdict_del(options, "driver");
1432 if (!drv) {
1433 error_setg(errp, "Unknown driver: '%s'", drvname);
1434 ret = -EINVAL;
1435 goto fail;
1436 }
1437 }
1438
1439 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1440 if (drv && !drv->bdrv_file_open) {
1441 /* If the user explicitly wants a format driver here, we'll need to add
1442 * another layer for the protocol in bs->file */
1443 flags &= ~BDRV_O_PROTOCOL;
1444 }
1445
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001446 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001447 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001448
Kevin Wolff4788ad2014-06-03 16:44:19 +02001449 /* Open image file without format layer */
1450 if ((flags & BDRV_O_PROTOCOL) == 0) {
1451 if (flags & BDRV_O_RDWR) {
1452 flags |= BDRV_O_ALLOW_RDWR;
1453 }
1454 if (flags & BDRV_O_SNAPSHOT) {
1455 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1456 flags = bdrv_backing_flags(flags);
1457 }
1458
1459 assert(file == NULL);
1460 ret = bdrv_open_image(&file, filename, options, "file",
1461 bdrv_inherited_flags(flags),
1462 true, &local_err);
1463 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001464 goto fail;
1465 }
1466 }
1467
Kevin Wolf76c591b2014-06-04 14:19:44 +02001468 /* Image format probing */
1469 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001470 ret = find_image_format(file, filename, &drv, &local_err);
1471 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001472 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001473 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001474 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001475 error_setg(errp, "Must specify either driver or file");
1476 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001477 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001478 }
1479
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001480 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001481 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001482 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001483 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001484 }
1485
Max Reitz2a05cbe2013-12-20 19:28:10 +01001486 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001487 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001488 file = NULL;
1489 }
1490
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001491 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001492 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001493 QDict *backing_options;
1494
Benoît Canet5726d872013-09-25 13:30:01 +02001495 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001496 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001497 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001498 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001499 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001500 }
1501
Max Reitz91af7012014-07-18 20:24:56 +02001502 bdrv_refresh_filename(bs);
1503
Kevin Wolfb9988752014-04-03 12:09:34 +02001504 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1505 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001506 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001507 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001508 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001509 goto close_and_fail;
1510 }
1511 }
1512
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001513 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001514 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001515 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001516 if (flags & BDRV_O_PROTOCOL) {
1517 error_setg(errp, "Block protocol '%s' doesn't support the option "
1518 "'%s'", drv->format_name, entry->key);
1519 } else {
1520 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1521 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001522 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001523 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001524
1525 ret = -EINVAL;
1526 goto close_and_fail;
1527 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001528
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001529 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001530 bdrv_dev_change_media_cb(bs, true);
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001531 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1532 && !runstate_check(RUN_STATE_INMIGRATE)
1533 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1534 error_setg(errp,
1535 "Guest must be stopped for opening of encrypted image");
1536 ret = -EBUSY;
1537 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001538 }
1539
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001540 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001541 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001542 return 0;
1543
Kevin Wolf8bfea152014-04-11 19:16:36 +02001544fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001545 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001546 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001547 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001548 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001549 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001550 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001551 if (!*pbs) {
1552 /* If *pbs is NULL, a new BDS has been created in this function and
1553 needs to be freed now. Otherwise, it does not need to be closed,
1554 since it has not really been opened yet. */
1555 bdrv_unref(bs);
1556 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001557 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001558 error_propagate(errp, local_err);
1559 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001560 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001561
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001562close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001563 /* See fail path, but now the BDS has to be always closed */
1564 if (*pbs) {
1565 bdrv_close(bs);
1566 } else {
1567 bdrv_unref(bs);
1568 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001569 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001570 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001571 error_propagate(errp, local_err);
1572 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001573 return ret;
1574}
1575
Jeff Codye971aa12012-09-20 15:13:19 -04001576typedef struct BlockReopenQueueEntry {
1577 bool prepared;
1578 BDRVReopenState state;
1579 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1580} BlockReopenQueueEntry;
1581
1582/*
1583 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1584 * reopen of multiple devices.
1585 *
1586 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1587 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1588 * be created and initialized. This newly created BlockReopenQueue should be
1589 * passed back in for subsequent calls that are intended to be of the same
1590 * atomic 'set'.
1591 *
1592 * bs is the BlockDriverState to add to the reopen queue.
1593 *
1594 * flags contains the open flags for the associated bs
1595 *
1596 * returns a pointer to bs_queue, which is either the newly allocated
1597 * bs_queue, or the existing bs_queue being used.
1598 *
1599 */
1600BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1601 BlockDriverState *bs, int flags)
1602{
1603 assert(bs != NULL);
1604
1605 BlockReopenQueueEntry *bs_entry;
1606 if (bs_queue == NULL) {
1607 bs_queue = g_new0(BlockReopenQueue, 1);
1608 QSIMPLEQ_INIT(bs_queue);
1609 }
1610
Kevin Wolff1f25a22014-04-25 19:04:55 +02001611 /* bdrv_open() masks this flag out */
1612 flags &= ~BDRV_O_PROTOCOL;
1613
Jeff Codye971aa12012-09-20 15:13:19 -04001614 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001615 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001616 }
1617
1618 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1619 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1620
1621 bs_entry->state.bs = bs;
1622 bs_entry->state.flags = flags;
1623
1624 return bs_queue;
1625}
1626
1627/*
1628 * Reopen multiple BlockDriverStates atomically & transactionally.
1629 *
1630 * The queue passed in (bs_queue) must have been built up previous
1631 * via bdrv_reopen_queue().
1632 *
1633 * Reopens all BDS specified in the queue, with the appropriate
1634 * flags. All devices are prepared for reopen, and failure of any
1635 * device will cause all device changes to be abandonded, and intermediate
1636 * data cleaned up.
1637 *
1638 * If all devices prepare successfully, then the changes are committed
1639 * to all devices.
1640 *
1641 */
1642int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1643{
1644 int ret = -1;
1645 BlockReopenQueueEntry *bs_entry, *next;
1646 Error *local_err = NULL;
1647
1648 assert(bs_queue != NULL);
1649
1650 bdrv_drain_all();
1651
1652 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1653 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1654 error_propagate(errp, local_err);
1655 goto cleanup;
1656 }
1657 bs_entry->prepared = true;
1658 }
1659
1660 /* If we reach this point, we have success and just need to apply the
1661 * changes
1662 */
1663 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1664 bdrv_reopen_commit(&bs_entry->state);
1665 }
1666
1667 ret = 0;
1668
1669cleanup:
1670 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1671 if (ret && bs_entry->prepared) {
1672 bdrv_reopen_abort(&bs_entry->state);
1673 }
1674 g_free(bs_entry);
1675 }
1676 g_free(bs_queue);
1677 return ret;
1678}
1679
1680
1681/* Reopen a single BlockDriverState with the specified flags. */
1682int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1683{
1684 int ret = -1;
1685 Error *local_err = NULL;
1686 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1687
1688 ret = bdrv_reopen_multiple(queue, &local_err);
1689 if (local_err != NULL) {
1690 error_propagate(errp, local_err);
1691 }
1692 return ret;
1693}
1694
1695
1696/*
1697 * Prepares a BlockDriverState for reopen. All changes are staged in the
1698 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1699 * the block driver layer .bdrv_reopen_prepare()
1700 *
1701 * bs is the BlockDriverState to reopen
1702 * flags are the new open flags
1703 * queue is the reopen queue
1704 *
1705 * Returns 0 on success, non-zero on error. On error errp will be set
1706 * as well.
1707 *
1708 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1709 * It is the responsibility of the caller to then call the abort() or
1710 * commit() for any other BDS that have been left in a prepare() state
1711 *
1712 */
1713int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1714 Error **errp)
1715{
1716 int ret = -1;
1717 Error *local_err = NULL;
1718 BlockDriver *drv;
1719
1720 assert(reopen_state != NULL);
1721 assert(reopen_state->bs->drv != NULL);
1722 drv = reopen_state->bs->drv;
1723
1724 /* if we are to stay read-only, do not allow permission change
1725 * to r/w */
1726 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1727 reopen_state->flags & BDRV_O_RDWR) {
1728 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001729 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001730 goto error;
1731 }
1732
1733
1734 ret = bdrv_flush(reopen_state->bs);
1735 if (ret) {
1736 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1737 strerror(-ret));
1738 goto error;
1739 }
1740
1741 if (drv->bdrv_reopen_prepare) {
1742 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1743 if (ret) {
1744 if (local_err != NULL) {
1745 error_propagate(errp, local_err);
1746 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001747 error_setg(errp, "failed while preparing to reopen image '%s'",
1748 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001749 }
1750 goto error;
1751 }
1752 } else {
1753 /* It is currently mandatory to have a bdrv_reopen_prepare()
1754 * handler for each supported drv. */
1755 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001756 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001757 "reopening of file");
1758 ret = -1;
1759 goto error;
1760 }
1761
1762 ret = 0;
1763
1764error:
1765 return ret;
1766}
1767
1768/*
1769 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1770 * makes them final by swapping the staging BlockDriverState contents into
1771 * the active BlockDriverState contents.
1772 */
1773void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1774{
1775 BlockDriver *drv;
1776
1777 assert(reopen_state != NULL);
1778 drv = reopen_state->bs->drv;
1779 assert(drv != NULL);
1780
1781 /* If there are any driver level actions to take */
1782 if (drv->bdrv_reopen_commit) {
1783 drv->bdrv_reopen_commit(reopen_state);
1784 }
1785
1786 /* set BDS specific flags now */
1787 reopen_state->bs->open_flags = reopen_state->flags;
1788 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1789 BDRV_O_CACHE_WB);
1790 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001791
Kevin Wolf3baca892014-07-16 17:48:16 +02001792 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001793}
1794
1795/*
1796 * Abort the reopen, and delete and free the staged changes in
1797 * reopen_state
1798 */
1799void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1800{
1801 BlockDriver *drv;
1802
1803 assert(reopen_state != NULL);
1804 drv = reopen_state->bs->drv;
1805 assert(drv != NULL);
1806
1807 if (drv->bdrv_reopen_abort) {
1808 drv->bdrv_reopen_abort(reopen_state);
1809 }
1810}
1811
1812
bellardfc01f7e2003-06-30 10:03:06 +00001813void bdrv_close(BlockDriverState *bs)
1814{
Max Reitz33384422014-06-20 21:57:33 +02001815 BdrvAioNotifier *ban, *ban_next;
1816
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001817 if (bs->job) {
1818 block_job_cancel_sync(bs->job);
1819 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001820 bdrv_drain_all(); /* complete I/O */
1821 bdrv_flush(bs);
1822 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001823 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001824
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001825 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001826 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001827 BlockDriverState *backing_hd = bs->backing_hd;
1828 bdrv_set_backing_hd(bs, NULL);
1829 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001830 }
bellardea2384d2004-08-01 21:59:26 +00001831 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001832 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001833 bs->opaque = NULL;
1834 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001835 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001836 bs->backing_file[0] = '\0';
1837 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001838 bs->total_sectors = 0;
1839 bs->encrypted = 0;
1840 bs->valid_key = 0;
1841 bs->sg = 0;
1842 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001843 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001844 QDECREF(bs->options);
1845 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001846 QDECREF(bs->full_open_options);
1847 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001848
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001849 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001850 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001851 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001852 }
bellardb3380822004-03-14 21:38:54 +00001853 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001854
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001855 bdrv_dev_change_media_cb(bs, false);
1856
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001857 /*throttling disk I/O limits*/
1858 if (bs->io_limits_enabled) {
1859 bdrv_io_limits_disable(bs);
1860 }
Max Reitz33384422014-06-20 21:57:33 +02001861
1862 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1863 g_free(ban);
1864 }
1865 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001866}
1867
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001868void bdrv_close_all(void)
1869{
1870 BlockDriverState *bs;
1871
Benoît Canetdc364f42014-01-23 21:31:32 +01001872 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001873 AioContext *aio_context = bdrv_get_aio_context(bs);
1874
1875 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001876 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001877 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001878 }
1879}
1880
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001881/* Check if any requests are in-flight (including throttled requests) */
1882static bool bdrv_requests_pending(BlockDriverState *bs)
1883{
1884 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1885 return true;
1886 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001887 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1888 return true;
1889 }
1890 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001891 return true;
1892 }
1893 if (bs->file && bdrv_requests_pending(bs->file)) {
1894 return true;
1895 }
1896 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1897 return true;
1898 }
1899 return false;
1900}
1901
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001902/*
1903 * Wait for pending requests to complete across all BlockDriverStates
1904 *
1905 * This function does not flush data to disk, use bdrv_flush_all() for that
1906 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001907 *
1908 * Note that completion of an asynchronous I/O operation can trigger any
1909 * number of other I/O operations on other devices---for example a coroutine
1910 * can be arbitrarily complex and a constant flow of I/O can come until the
1911 * coroutine is complete. Because of this, it is not possible to have a
1912 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001913 */
1914void bdrv_drain_all(void)
1915{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001916 /* Always run first iteration so any pending completion BHs run */
1917 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001918 BlockDriverState *bs;
1919
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001920 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001921 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001922
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001923 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1924 AioContext *aio_context = bdrv_get_aio_context(bs);
1925 bool bs_busy;
1926
1927 aio_context_acquire(aio_context);
Ming Lei448ad912014-07-04 18:04:33 +08001928 bdrv_flush_io_queue(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02001929 bdrv_start_throttled_reqs(bs);
1930 bs_busy = bdrv_requests_pending(bs);
1931 bs_busy |= aio_poll(aio_context, bs_busy);
1932 aio_context_release(aio_context);
1933
1934 busy |= bs_busy;
1935 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001936 }
1937}
1938
Benoît Canetdc364f42014-01-23 21:31:32 +01001939/* make a BlockDriverState anonymous by removing from bdrv_state and
1940 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001941 Also, NULL terminate the device_name to prevent double remove */
1942void bdrv_make_anon(BlockDriverState *bs)
1943{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001944 /*
1945 * Take care to remove bs from bdrv_states only when it's actually
1946 * in it. Note that bs->device_list.tqe_prev is initially null,
1947 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1948 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1949 * resetting it to null on remove.
1950 */
1951 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001952 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001953 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05001954 }
Benoît Canetdc364f42014-01-23 21:31:32 +01001955 if (bs->node_name[0] != '\0') {
1956 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1957 }
1958 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001959}
1960
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001961static void bdrv_rebind(BlockDriverState *bs)
1962{
1963 if (bs->drv && bs->drv->bdrv_rebind) {
1964 bs->drv->bdrv_rebind(bs);
1965 }
1966}
1967
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001968static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1969 BlockDriverState *bs_src)
1970{
1971 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001972
1973 /* dev info */
1974 bs_dest->dev_ops = bs_src->dev_ops;
1975 bs_dest->dev_opaque = bs_src->dev_opaque;
1976 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001977 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001978 bs_dest->copy_on_read = bs_src->copy_on_read;
1979
1980 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1981
Benoît Canetcc0681c2013-09-02 14:14:39 +02001982 /* i/o throttled req */
1983 memcpy(&bs_dest->throttle_state,
1984 &bs_src->throttle_state,
1985 sizeof(ThrottleState));
1986 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1987 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001988 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1989
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001990 /* r/w error */
1991 bs_dest->on_read_error = bs_src->on_read_error;
1992 bs_dest->on_write_error = bs_src->on_write_error;
1993
1994 /* i/o status */
1995 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1996 bs_dest->iostatus = bs_src->iostatus;
1997
1998 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08001999 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002000
Fam Zheng9fcb0252013-08-23 09:14:46 +08002001 /* reference count */
2002 bs_dest->refcnt = bs_src->refcnt;
2003
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002004 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002005 bs_dest->job = bs_src->job;
2006
2007 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002008 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002009 bs_dest->blk = bs_src->blk;
2010
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002011 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2012 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002013}
2014
2015/*
2016 * Swap bs contents for two image chains while they are live,
2017 * while keeping required fields on the BlockDriverState that is
2018 * actually attached to a device.
2019 *
2020 * This will modify the BlockDriverState fields, and swap contents
2021 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2022 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002023 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002024 *
2025 * This function does not create any image files.
2026 */
2027void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2028{
2029 BlockDriverState tmp;
2030
Benoît Canet90ce8a02014-03-05 23:48:29 +01002031 /* The code needs to swap the node_name but simply swapping node_list won't
2032 * work so first remove the nodes from the graph list, do the swap then
2033 * insert them back if needed.
2034 */
2035 if (bs_new->node_name[0] != '\0') {
2036 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2037 }
2038 if (bs_old->node_name[0] != '\0') {
2039 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2040 }
2041
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002042 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002043 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002044 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002045 assert(bs_new->job == NULL);
2046 assert(bs_new->dev == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002047 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002048 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002049
2050 tmp = *bs_new;
2051 *bs_new = *bs_old;
2052 *bs_old = tmp;
2053
2054 /* there are some fields that should not be swapped, move them back */
2055 bdrv_move_feature_fields(&tmp, bs_old);
2056 bdrv_move_feature_fields(bs_old, bs_new);
2057 bdrv_move_feature_fields(bs_new, &tmp);
2058
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002059 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002060 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002061
2062 /* Check a few fields that should remain attached to the device */
2063 assert(bs_new->dev == NULL);
2064 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002065 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002066 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002067
Benoît Canet90ce8a02014-03-05 23:48:29 +01002068 /* insert the nodes back into the graph node list if needed */
2069 if (bs_new->node_name[0] != '\0') {
2070 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2071 }
2072 if (bs_old->node_name[0] != '\0') {
2073 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2074 }
2075
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002076 bdrv_rebind(bs_new);
2077 bdrv_rebind(bs_old);
2078}
2079
Jeff Cody8802d1f2012-02-28 15:54:06 -05002080/*
2081 * Add new bs contents at the top of an image chain while the chain is
2082 * live, while keeping required fields on the top layer.
2083 *
2084 * This will modify the BlockDriverState fields, and swap contents
2085 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2086 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002087 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002088 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002089 * This function does not create any image files.
2090 */
2091void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2092{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002093 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002094
2095 /* The contents of 'tmp' will become bs_top, as we are
2096 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002097 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002098}
2099
Fam Zheng4f6fd342013-08-23 09:14:47 +08002100static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002101{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002102 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02002103 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002104 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002105 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002106 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002107
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002108 bdrv_close(bs);
2109
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002110 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002111 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002112
Anthony Liguori7267c092011-08-20 22:09:37 -05002113 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002114}
2115
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002116int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2117/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02002118{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002119 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02002120 return -EBUSY;
2121 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002122 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03002123 bdrv_iostatus_reset(bs);
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002124
2125 /* We're expecting I/O from the device so bump up coroutine pool size */
2126 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002127 return 0;
2128}
2129
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002130/* TODO qdevified devices don't use this, remove when devices are qdevified */
2131void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02002132{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002133 if (bdrv_attach_dev(bs, dev) < 0) {
2134 abort();
2135 }
2136}
2137
2138void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2139/* TODO change to DeviceState *dev when all users are qdevified */
2140{
2141 assert(bs->dev == dev);
2142 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02002143 bs->dev_ops = NULL;
2144 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002145 bs->guest_block_size = 512;
Stefan Hajnoczi2a871512014-07-07 15:15:53 +02002146 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
Markus Armbruster18846de2010-06-29 16:58:30 +02002147}
2148
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002149/* TODO change to return DeviceState * when all users are qdevified */
2150void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002151{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002152 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002153}
2154
Markus Armbruster0e49de52011-08-03 15:07:41 +02002155void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2156 void *opaque)
2157{
2158 bs->dev_ops = ops;
2159 bs->dev_opaque = opaque;
2160}
2161
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002162static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002163{
Markus Armbruster145feb12011-08-03 15:07:42 +02002164 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002165 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002166 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002167 if (tray_was_closed) {
2168 /* tray open */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002169 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2170 true, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002171 }
2172 if (load) {
2173 /* tray close */
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02002174 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2175 false, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002176 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002177 }
2178}
2179
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002180bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2181{
2182 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2183}
2184
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002185void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2186{
2187 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2188 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2189 }
2190}
2191
Markus Armbrustere4def802011-09-06 18:58:53 +02002192bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2193{
2194 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2195 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2196 }
2197 return false;
2198}
2199
Markus Armbruster145feb12011-08-03 15:07:42 +02002200static void bdrv_dev_resize_cb(BlockDriverState *bs)
2201{
2202 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2203 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002204 }
2205}
2206
Markus Armbrusterf1076392011-09-06 18:58:46 +02002207bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2208{
2209 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2210 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2211 }
2212 return false;
2213}
2214
aliguorie97fc192009-04-21 23:11:50 +00002215/*
2216 * Run consistency checks on an image
2217 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002218 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002219 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002220 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002221 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002222int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002223{
Max Reitz908bcd52014-08-07 22:47:55 +02002224 if (bs->drv == NULL) {
2225 return -ENOMEDIUM;
2226 }
aliguorie97fc192009-04-21 23:11:50 +00002227 if (bs->drv->bdrv_check == NULL) {
2228 return -ENOTSUP;
2229 }
2230
Kevin Wolfe076f332010-06-29 11:43:13 +02002231 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002232 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002233}
2234
Kevin Wolf8a426612010-07-16 17:17:01 +02002235#define COMMIT_BUF_SECTORS 2048
2236
bellard33e39632003-07-06 17:15:21 +00002237/* commit COW file into the raw image */
2238int bdrv_commit(BlockDriverState *bs)
2239{
bellard19cb3732006-08-19 11:45:59 +00002240 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002241 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002242 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002243 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002244 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002245 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002246
bellard19cb3732006-08-19 11:45:59 +00002247 if (!drv)
2248 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002249
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002250 if (!bs->backing_hd) {
2251 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002252 }
2253
Fam Zheng3718d8a2014-05-23 21:29:43 +08002254 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2255 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002256 return -EBUSY;
2257 }
2258
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002259 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002260 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2261 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002262 open_flags = bs->backing_hd->open_flags;
2263
2264 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002265 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2266 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002267 }
bellard33e39632003-07-06 17:15:21 +00002268 }
bellardea2384d2004-08-01 21:59:26 +00002269
Jeff Cody72706ea2014-01-24 09:02:35 -05002270 length = bdrv_getlength(bs);
2271 if (length < 0) {
2272 ret = length;
2273 goto ro_cleanup;
2274 }
2275
2276 backing_length = bdrv_getlength(bs->backing_hd);
2277 if (backing_length < 0) {
2278 ret = backing_length;
2279 goto ro_cleanup;
2280 }
2281
2282 /* If our top snapshot is larger than the backing file image,
2283 * grow the backing file image if possible. If not possible,
2284 * we must return an error */
2285 if (length > backing_length) {
2286 ret = bdrv_truncate(bs->backing_hd, length);
2287 if (ret < 0) {
2288 goto ro_cleanup;
2289 }
2290 }
2291
2292 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002293
2294 /* qemu_try_blockalign() for bs will choose an alignment that works for
2295 * bs->backing_hd as well, so no need to compare the alignment manually. */
2296 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2297 if (buf == NULL) {
2298 ret = -ENOMEM;
2299 goto ro_cleanup;
2300 }
bellardea2384d2004-08-01 21:59:26 +00002301
Kevin Wolf8a426612010-07-16 17:17:01 +02002302 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002303 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2304 if (ret < 0) {
2305 goto ro_cleanup;
2306 }
2307 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002308 ret = bdrv_read(bs, sector, buf, n);
2309 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002310 goto ro_cleanup;
2311 }
2312
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002313 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2314 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002315 goto ro_cleanup;
2316 }
bellardea2384d2004-08-01 21:59:26 +00002317 }
2318 }
bellard95389c82005-12-18 18:28:15 +00002319
Christoph Hellwig1d449522010-01-17 12:32:30 +01002320 if (drv->bdrv_make_empty) {
2321 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002322 if (ret < 0) {
2323 goto ro_cleanup;
2324 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002325 bdrv_flush(bs);
2326 }
bellard95389c82005-12-18 18:28:15 +00002327
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002328 /*
2329 * Make sure all data we wrote to the backing device is actually
2330 * stable on disk.
2331 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002332 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002333 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002334 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002335
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002336 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002337ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002338 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002339
2340 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002341 /* ignoring error return here */
2342 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002343 }
2344
Christoph Hellwig1d449522010-01-17 12:32:30 +01002345 return ret;
bellard33e39632003-07-06 17:15:21 +00002346}
2347
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002348int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002349{
2350 BlockDriverState *bs;
2351
Benoît Canetdc364f42014-01-23 21:31:32 +01002352 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002353 AioContext *aio_context = bdrv_get_aio_context(bs);
2354
2355 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002356 if (bs->drv && bs->backing_hd) {
2357 int ret = bdrv_commit(bs);
2358 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002359 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002360 return ret;
2361 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002362 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002363 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002364 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002365 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002366}
2367
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002368/**
2369 * Remove an active request from the tracked requests list
2370 *
2371 * This function should be called when a tracked request is completing.
2372 */
2373static void tracked_request_end(BdrvTrackedRequest *req)
2374{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002375 if (req->serialising) {
2376 req->bs->serialising_in_flight--;
2377 }
2378
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002379 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002380 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002381}
2382
2383/**
2384 * Add an active request to the tracked requests list
2385 */
2386static void tracked_request_begin(BdrvTrackedRequest *req,
2387 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002388 int64_t offset,
2389 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002390{
2391 *req = (BdrvTrackedRequest){
2392 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002393 .offset = offset,
2394 .bytes = bytes,
2395 .is_write = is_write,
2396 .co = qemu_coroutine_self(),
2397 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002398 .overlap_offset = offset,
2399 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002400 };
2401
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002402 qemu_co_queue_init(&req->wait_queue);
2403
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002404 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2405}
2406
Kevin Wolfe96126f2014-02-08 10:42:18 +01002407static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002408{
Kevin Wolf73271452013-12-04 17:08:50 +01002409 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002410 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2411 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002412
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002413 if (!req->serialising) {
2414 req->bs->serialising_in_flight++;
2415 req->serialising = true;
2416 }
Kevin Wolf73271452013-12-04 17:08:50 +01002417
2418 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2419 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002420}
2421
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002422/**
2423 * Round a region to cluster boundaries
2424 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002425void bdrv_round_to_clusters(BlockDriverState *bs,
2426 int64_t sector_num, int nb_sectors,
2427 int64_t *cluster_sector_num,
2428 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002429{
2430 BlockDriverInfo bdi;
2431
2432 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2433 *cluster_sector_num = sector_num;
2434 *cluster_nb_sectors = nb_sectors;
2435 } else {
2436 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2437 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2438 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2439 nb_sectors, c);
2440 }
2441}
2442
Kevin Wolf73271452013-12-04 17:08:50 +01002443static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002444{
2445 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002446 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002447
Kevin Wolf73271452013-12-04 17:08:50 +01002448 ret = bdrv_get_info(bs, &bdi);
2449 if (ret < 0 || bdi.cluster_size == 0) {
2450 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002451 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002452 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002453 }
2454}
2455
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002456static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002457 int64_t offset, unsigned int bytes)
2458{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002459 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002460 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002461 return false;
2462 }
2463 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002464 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002465 return false;
2466 }
2467 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002468}
2469
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002470static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002471{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002472 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002473 BdrvTrackedRequest *req;
2474 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002475 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002476
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002477 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002478 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002479 }
2480
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002481 do {
2482 retry = false;
2483 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002484 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002485 continue;
2486 }
Kevin Wolf73271452013-12-04 17:08:50 +01002487 if (tracked_request_overlaps(req, self->overlap_offset,
2488 self->overlap_bytes))
2489 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002490 /* Hitting this means there was a reentrant request, for
2491 * example, a block driver issuing nested requests. This must
2492 * never happen since it means deadlock.
2493 */
2494 assert(qemu_coroutine_self() != req->co);
2495
Kevin Wolf64604402013-12-13 13:04:35 +01002496 /* If the request is already (indirectly) waiting for us, or
2497 * will wait for us as soon as it wakes up, then just go on
2498 * (instead of producing a deadlock in the former case). */
2499 if (!req->waiting_for) {
2500 self->waiting_for = req;
2501 qemu_co_queue_wait(&req->wait_queue);
2502 self->waiting_for = NULL;
2503 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002504 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002505 break;
2506 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002507 }
2508 }
2509 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002510
2511 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002512}
2513
Kevin Wolf756e6732010-01-12 12:55:17 +01002514/*
2515 * Return values:
2516 * 0 - success
2517 * -EINVAL - backing format specified, but no file
2518 * -ENOSPC - can't update the backing file because no space is left in the
2519 * image file header
2520 * -ENOTSUP - format driver doesn't support changing the backing file
2521 */
2522int bdrv_change_backing_file(BlockDriverState *bs,
2523 const char *backing_file, const char *backing_fmt)
2524{
2525 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002526 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002527
Paolo Bonzini5f377792012-04-12 14:01:01 +02002528 /* Backing file format doesn't make sense without a backing file */
2529 if (backing_fmt && !backing_file) {
2530 return -EINVAL;
2531 }
2532
Kevin Wolf756e6732010-01-12 12:55:17 +01002533 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002534 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002535 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002536 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002537 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002538
2539 if (ret == 0) {
2540 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2541 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2542 }
2543 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002544}
2545
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002546/*
2547 * Finds the image layer in the chain that has 'bs' as its backing file.
2548 *
2549 * active is the current topmost image.
2550 *
2551 * Returns NULL if bs is not found in active's image chain,
2552 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002553 *
2554 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002555 */
2556BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2557 BlockDriverState *bs)
2558{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002559 while (active && bs != active->backing_hd) {
2560 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002561 }
2562
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002563 return active;
2564}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002565
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002566/* Given a BDS, searches for the base layer. */
2567BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2568{
2569 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002570}
2571
2572typedef struct BlkIntermediateStates {
2573 BlockDriverState *bs;
2574 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2575} BlkIntermediateStates;
2576
2577
2578/*
2579 * Drops images above 'base' up to and including 'top', and sets the image
2580 * above 'top' to have base as its backing file.
2581 *
2582 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2583 * information in 'bs' can be properly updated.
2584 *
2585 * E.g., this will convert the following chain:
2586 * bottom <- base <- intermediate <- top <- active
2587 *
2588 * to
2589 *
2590 * bottom <- base <- active
2591 *
2592 * It is allowed for bottom==base, in which case it converts:
2593 *
2594 * base <- intermediate <- top <- active
2595 *
2596 * to
2597 *
2598 * base <- active
2599 *
Jeff Cody54e26902014-06-25 15:40:10 -04002600 * If backing_file_str is non-NULL, it will be used when modifying top's
2601 * overlay image metadata.
2602 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002603 * Error conditions:
2604 * if active == top, that is considered an error
2605 *
2606 */
2607int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002608 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002609{
2610 BlockDriverState *intermediate;
2611 BlockDriverState *base_bs = NULL;
2612 BlockDriverState *new_top_bs = NULL;
2613 BlkIntermediateStates *intermediate_state, *next;
2614 int ret = -EIO;
2615
2616 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2617 QSIMPLEQ_INIT(&states_to_delete);
2618
2619 if (!top->drv || !base->drv) {
2620 goto exit;
2621 }
2622
2623 new_top_bs = bdrv_find_overlay(active, top);
2624
2625 if (new_top_bs == NULL) {
2626 /* we could not find the image above 'top', this is an error */
2627 goto exit;
2628 }
2629
2630 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2631 * to do, no intermediate images */
2632 if (new_top_bs->backing_hd == base) {
2633 ret = 0;
2634 goto exit;
2635 }
2636
2637 intermediate = top;
2638
2639 /* now we will go down through the list, and add each BDS we find
2640 * into our deletion queue, until we hit the 'base'
2641 */
2642 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002643 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002644 intermediate_state->bs = intermediate;
2645 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2646
2647 if (intermediate->backing_hd == base) {
2648 base_bs = intermediate->backing_hd;
2649 break;
2650 }
2651 intermediate = intermediate->backing_hd;
2652 }
2653 if (base_bs == NULL) {
2654 /* something went wrong, we did not end at the base. safely
2655 * unravel everything, and exit with error */
2656 goto exit;
2657 }
2658
2659 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002660 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2661 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002662 base_bs->drv ? base_bs->drv->format_name : "");
2663 if (ret) {
2664 goto exit;
2665 }
Fam Zheng920beae2014-05-23 21:29:46 +08002666 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002667
2668 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2669 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002670 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002671 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002672 }
2673 ret = 0;
2674
2675exit:
2676 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2677 g_free(intermediate_state);
2678 }
2679 return ret;
2680}
2681
2682
aliguori71d07702009-03-03 17:37:16 +00002683static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2684 size_t size)
2685{
2686 int64_t len;
2687
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002688 if (size > INT_MAX) {
2689 return -EIO;
2690 }
2691
aliguori71d07702009-03-03 17:37:16 +00002692 if (!bdrv_is_inserted(bs))
2693 return -ENOMEDIUM;
2694
2695 if (bs->growable)
2696 return 0;
2697
2698 len = bdrv_getlength(bs);
2699
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002700 if (offset < 0)
2701 return -EIO;
2702
2703 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002704 return -EIO;
2705
2706 return 0;
2707}
2708
2709static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2710 int nb_sectors)
2711{
Kevin Wolf54db38a2014-04-14 14:47:14 +02002712 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002713 return -EIO;
2714 }
2715
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002716 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2717 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002718}
2719
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002720typedef struct RwCo {
2721 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002722 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002723 QEMUIOVector *qiov;
2724 bool is_write;
2725 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002726 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002727} RwCo;
2728
2729static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2730{
2731 RwCo *rwco = opaque;
2732
2733 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002734 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2735 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002736 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002737 } else {
2738 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2739 rwco->qiov->size, rwco->qiov,
2740 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002741 }
2742}
2743
2744/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002745 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002746 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002747static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2748 QEMUIOVector *qiov, bool is_write,
2749 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002750{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002751 Coroutine *co;
2752 RwCo rwco = {
2753 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002754 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002755 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002756 .is_write = is_write,
2757 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002758 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002759 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002760
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002761 /**
2762 * In sync call context, when the vcpu is blocked, this throttling timer
2763 * will not fire; so the I/O throttling function has to be disabled here
2764 * if it has been enabled.
2765 */
2766 if (bs->io_limits_enabled) {
2767 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2768 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2769 bdrv_io_limits_disable(bs);
2770 }
2771
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002772 if (qemu_in_coroutine()) {
2773 /* Fast-path if already in coroutine context */
2774 bdrv_rw_co_entry(&rwco);
2775 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002776 AioContext *aio_context = bdrv_get_aio_context(bs);
2777
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002778 co = qemu_coroutine_create(bdrv_rw_co_entry);
2779 qemu_coroutine_enter(co, &rwco);
2780 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002781 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002782 }
2783 }
2784 return rwco.ret;
2785}
2786
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002787/*
2788 * Process a synchronous request using coroutines
2789 */
2790static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002791 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002792{
2793 QEMUIOVector qiov;
2794 struct iovec iov = {
2795 .iov_base = (void *)buf,
2796 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2797 };
2798
Kevin Wolfda15ee52014-04-14 15:39:36 +02002799 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2800 return -EINVAL;
2801 }
2802
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002803 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002804 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2805 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002806}
2807
bellard19cb3732006-08-19 11:45:59 +00002808/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002809int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002810 uint8_t *buf, int nb_sectors)
2811{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002812 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002813}
2814
Markus Armbruster07d27a42012-06-29 17:34:29 +02002815/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2816int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2817 uint8_t *buf, int nb_sectors)
2818{
2819 bool enabled;
2820 int ret;
2821
2822 enabled = bs->io_limits_enabled;
2823 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002824 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002825 bs->io_limits_enabled = enabled;
2826 return ret;
2827}
2828
ths5fafdf22007-09-16 21:08:06 +00002829/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002830 -EIO generic I/O error (may happen for all errors)
2831 -ENOMEDIUM No media inserted.
2832 -EINVAL Invalid sector number or nb_sectors
2833 -EACCES Trying to write a read-only device
2834*/
ths5fafdf22007-09-16 21:08:06 +00002835int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002836 const uint8_t *buf, int nb_sectors)
2837{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002838 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002839}
2840
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002841int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2842 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002843{
2844 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002845 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002846}
2847
Peter Lievend75cbb52013-10-24 12:07:03 +02002848/*
2849 * Completely zero out a block device with the help of bdrv_write_zeroes.
2850 * The operation is sped up by checking the block status and only writing
2851 * zeroes to the device if they currently do not return zeroes. Optional
2852 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2853 *
2854 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2855 */
2856int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2857{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002858 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002859 int n;
2860
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002861 target_sectors = bdrv_nb_sectors(bs);
2862 if (target_sectors < 0) {
2863 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002864 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002865
Peter Lievend75cbb52013-10-24 12:07:03 +02002866 for (;;) {
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002867 nb_sectors = target_sectors - sector_num;
Peter Lievend75cbb52013-10-24 12:07:03 +02002868 if (nb_sectors <= 0) {
2869 return 0;
2870 }
2871 if (nb_sectors > INT_MAX) {
2872 nb_sectors = INT_MAX;
2873 }
2874 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002875 if (ret < 0) {
2876 error_report("error getting block status at sector %" PRId64 ": %s",
2877 sector_num, strerror(-ret));
2878 return ret;
2879 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002880 if (ret & BDRV_BLOCK_ZERO) {
2881 sector_num += n;
2882 continue;
2883 }
2884 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2885 if (ret < 0) {
2886 error_report("error writing zeroes at sector %" PRId64 ": %s",
2887 sector_num, strerror(-ret));
2888 return ret;
2889 }
2890 sector_num += n;
2891 }
2892}
2893
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002894int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002895{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002896 QEMUIOVector qiov;
2897 struct iovec iov = {
2898 .iov_base = (void *)buf,
2899 .iov_len = bytes,
2900 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002901 int ret;
bellard83f64092006-08-01 16:21:11 +00002902
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002903 if (bytes < 0) {
2904 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002905 }
2906
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002907 qemu_iovec_init_external(&qiov, &iov, 1);
2908 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2909 if (ret < 0) {
2910 return ret;
bellard83f64092006-08-01 16:21:11 +00002911 }
2912
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002913 return bytes;
bellard83f64092006-08-01 16:21:11 +00002914}
2915
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002916int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002917{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002918 int ret;
bellard83f64092006-08-01 16:21:11 +00002919
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002920 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2921 if (ret < 0) {
2922 return ret;
bellard83f64092006-08-01 16:21:11 +00002923 }
2924
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002925 return qiov->size;
2926}
2927
2928int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002929 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002930{
2931 QEMUIOVector qiov;
2932 struct iovec iov = {
2933 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002934 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002935 };
2936
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002937 if (bytes < 0) {
2938 return -EINVAL;
2939 }
2940
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002941 qemu_iovec_init_external(&qiov, &iov, 1);
2942 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002943}
bellard83f64092006-08-01 16:21:11 +00002944
Kevin Wolff08145f2010-06-16 16:38:15 +02002945/*
2946 * Writes to the file and ensures that no writes are reordered across this
2947 * request (acts as a barrier)
2948 *
2949 * Returns 0 on success, -errno in error cases.
2950 */
2951int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2952 const void *buf, int count)
2953{
2954 int ret;
2955
2956 ret = bdrv_pwrite(bs, offset, buf, count);
2957 if (ret < 0) {
2958 return ret;
2959 }
2960
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002961 /* No flush needed for cache modes that already do it */
2962 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002963 bdrv_flush(bs);
2964 }
2965
2966 return 0;
2967}
2968
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002969static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002970 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2971{
2972 /* Perform I/O through a temporary buffer so that users who scribble over
2973 * their read buffer while the operation is in progress do not end up
2974 * modifying the image file. This is critical for zero-copy guest I/O
2975 * where anything might happen inside guest memory.
2976 */
2977 void *bounce_buffer;
2978
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002979 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002980 struct iovec iov;
2981 QEMUIOVector bounce_qiov;
2982 int64_t cluster_sector_num;
2983 int cluster_nb_sectors;
2984 size_t skip_bytes;
2985 int ret;
2986
2987 /* Cover entire cluster so no additional backing file I/O is required when
2988 * allocating cluster in the image file.
2989 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002990 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2991 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002992
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002993 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2994 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002995
2996 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002997 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
2998 if (bounce_buffer == NULL) {
2999 ret = -ENOMEM;
3000 goto err;
3001 }
3002
Stefan Hajnocziab185922011-11-17 13:40:31 +00003003 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3004
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003005 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3006 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003007 if (ret < 0) {
3008 goto err;
3009 }
3010
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003011 if (drv->bdrv_co_write_zeroes &&
3012 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003013 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003014 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003015 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003016 /* This does not change the data on the disk, it is not necessary
3017 * to flush even in cache=writethrough mode.
3018 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003019 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003020 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003021 }
3022
Stefan Hajnocziab185922011-11-17 13:40:31 +00003023 if (ret < 0) {
3024 /* It might be okay to ignore write errors for guest requests. If this
3025 * is a deliberate copy-on-read then we don't want to ignore the error.
3026 * Simply report it in all cases.
3027 */
3028 goto err;
3029 }
3030
3031 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003032 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3033 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003034
3035err:
3036 qemu_vfree(bounce_buffer);
3037 return ret;
3038}
3039
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003040/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003041 * Forwards an already correctly aligned request to the BlockDriver. This
3042 * handles copy on read and zeroing after EOF; any other features must be
3043 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003044 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003045static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003046 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003047 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003048{
3049 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003050 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003051
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003052 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3053 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003054
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003055 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3056 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003057 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003058
3059 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003060 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003061 /* If we touch the same cluster it counts as an overlap. This
3062 * guarantees that allocating writes will be serialized and not race
3063 * with each other for the same cluster. For example, in copy-on-read
3064 * it ensures that the CoR read and write operations are atomic and
3065 * guest writes cannot interleave between them. */
3066 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003067 }
3068
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003069 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003070
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003071 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003072 int pnum;
3073
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003074 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003075 if (ret < 0) {
3076 goto out;
3077 }
3078
3079 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003080 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003081 goto out;
3082 }
3083 }
3084
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003085 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003086 if (!(bs->zero_beyond_eof && bs->growable)) {
3087 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3088 } else {
3089 /* Read zeros after EOF of growable BDSes */
Markus Armbruster40490822014-06-26 13:23:19 +02003090 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003091
Markus Armbruster40490822014-06-26 13:23:19 +02003092 total_sectors = bdrv_nb_sectors(bs);
3093 if (total_sectors < 0) {
3094 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003095 goto out;
3096 }
3097
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003098 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3099 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003100 if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003101 QEMUIOVector local_qiov;
3102 size_t local_sectors;
3103
3104 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3105 local_sectors = MIN(max_nb_sectors, nb_sectors);
3106
3107 qemu_iovec_init(&local_qiov, qiov->niov);
3108 qemu_iovec_concat(&local_qiov, qiov, 0,
3109 local_sectors * BDRV_SECTOR_SIZE);
3110
3111 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3112 &local_qiov);
3113
3114 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003115 } else {
3116 ret = 0;
3117 }
3118
3119 /* Reading beyond end of file is supposed to produce zeroes */
3120 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3121 uint64_t offset = MAX(0, total_sectors - sector_num);
3122 uint64_t bytes = (sector_num + nb_sectors - offset) *
3123 BDRV_SECTOR_SIZE;
3124 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3125 }
3126 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003127
3128out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003129 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003130}
3131
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003132/*
3133 * Handle a read request in coroutine context
3134 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003135static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3136 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003137 BdrvRequestFlags flags)
3138{
3139 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003140 BdrvTrackedRequest req;
3141
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003142 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3143 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3144 uint8_t *head_buf = NULL;
3145 uint8_t *tail_buf = NULL;
3146 QEMUIOVector local_qiov;
3147 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003148 int ret;
3149
3150 if (!drv) {
3151 return -ENOMEDIUM;
3152 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003153 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003154 return -EIO;
3155 }
3156
3157 if (bs->copy_on_read) {
3158 flags |= BDRV_REQ_COPY_ON_READ;
3159 }
3160
3161 /* throttling disk I/O */
3162 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003163 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003164 }
3165
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003166 /* Align read if necessary by padding qiov */
3167 if (offset & (align - 1)) {
3168 head_buf = qemu_blockalign(bs, align);
3169 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3170 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3171 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3172 use_local_qiov = true;
3173
3174 bytes += offset & (align - 1);
3175 offset = offset & ~(align - 1);
3176 }
3177
3178 if ((offset + bytes) & (align - 1)) {
3179 if (!use_local_qiov) {
3180 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3181 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3182 use_local_qiov = true;
3183 }
3184 tail_buf = qemu_blockalign(bs, align);
3185 qemu_iovec_add(&local_qiov, tail_buf,
3186 align - ((offset + bytes) & (align - 1)));
3187
3188 bytes = ROUND_UP(bytes, align);
3189 }
3190
Kevin Wolf65afd212013-12-03 14:55:55 +01003191 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003192 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003193 use_local_qiov ? &local_qiov : qiov,
3194 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003195 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003196
3197 if (use_local_qiov) {
3198 qemu_iovec_destroy(&local_qiov);
3199 qemu_vfree(head_buf);
3200 qemu_vfree(tail_buf);
3201 }
3202
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003203 return ret;
3204}
3205
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003206static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3207 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3208 BdrvRequestFlags flags)
3209{
3210 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3211 return -EINVAL;
3212 }
3213
3214 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3215 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3216}
3217
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003218int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003219 int nb_sectors, QEMUIOVector *qiov)
3220{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003221 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003222
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003223 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3224}
3225
3226int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3227 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3228{
3229 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3230
3231 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3232 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003233}
3234
Peter Lievenc31cb702013-10-24 12:06:58 +02003235/* if no limit is specified in the BlockLimits use a default
3236 * of 32768 512-byte sectors (16 MiB) per request.
3237 */
3238#define MAX_WRITE_ZEROES_DEFAULT 32768
3239
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003240static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003241 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003242{
3243 BlockDriver *drv = bs->drv;
3244 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003245 struct iovec iov = {0};
3246 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003247
Peter Lievenc31cb702013-10-24 12:06:58 +02003248 int max_write_zeroes = bs->bl.max_write_zeroes ?
3249 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003250
Peter Lievenc31cb702013-10-24 12:06:58 +02003251 while (nb_sectors > 0 && !ret) {
3252 int num = nb_sectors;
3253
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003254 /* Align request. Block drivers can expect the "bulk" of the request
3255 * to be aligned.
3256 */
3257 if (bs->bl.write_zeroes_alignment
3258 && num > bs->bl.write_zeroes_alignment) {
3259 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3260 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003261 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003262 num -= sector_num % bs->bl.write_zeroes_alignment;
3263 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3264 /* Shorten the request to the last aligned sector. num cannot
3265 * underflow because num > bs->bl.write_zeroes_alignment.
3266 */
3267 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003268 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003269 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003270
3271 /* limit request size */
3272 if (num > max_write_zeroes) {
3273 num = max_write_zeroes;
3274 }
3275
3276 ret = -ENOTSUP;
3277 /* First try the efficient write zeroes operation */
3278 if (drv->bdrv_co_write_zeroes) {
3279 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3280 }
3281
3282 if (ret == -ENOTSUP) {
3283 /* Fall back to bounce buffer if write zeroes is unsupported */
3284 iov.iov_len = num * BDRV_SECTOR_SIZE;
3285 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003286 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3287 if (iov.iov_base == NULL) {
3288 ret = -ENOMEM;
3289 goto fail;
3290 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003291 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003292 }
3293 qemu_iovec_init_external(&qiov, &iov, 1);
3294
3295 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003296
3297 /* Keep bounce buffer around if it is big enough for all
3298 * all future requests.
3299 */
3300 if (num < max_write_zeroes) {
3301 qemu_vfree(iov.iov_base);
3302 iov.iov_base = NULL;
3303 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003304 }
3305
3306 sector_num += num;
3307 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003308 }
3309
Kevin Wolf857d4f42014-05-20 13:16:51 +02003310fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003311 qemu_vfree(iov.iov_base);
3312 return ret;
3313}
3314
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003315/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003316 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003317 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003318static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003319 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3320 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003321{
3322 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003323 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003324 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003325
Kevin Wolfb404f722013-12-03 14:02:23 +01003326 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3327 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003328
Kevin Wolfb404f722013-12-03 14:02:23 +01003329 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3330 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003331 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003332
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003333 waited = wait_serialising_requests(req);
3334 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003335 assert(req->overlap_offset <= offset);
3336 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003337
Kevin Wolf65afd212013-12-03 14:55:55 +01003338 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003339
Peter Lieven465bee12014-05-18 00:58:19 +02003340 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3341 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3342 qemu_iovec_is_zero(qiov)) {
3343 flags |= BDRV_REQ_ZERO_WRITE;
3344 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3345 flags |= BDRV_REQ_MAY_UNMAP;
3346 }
3347 }
3348
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003349 if (ret < 0) {
3350 /* Do nothing, write notifier decided to fail this request */
3351 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003352 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003353 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003354 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003355 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003356 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3357 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003358 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003359
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003360 if (ret == 0 && !bs->enable_write_cache) {
3361 ret = bdrv_co_flush(bs);
3362 }
3363
Fam Zhenge4654d22013-11-13 18:29:43 +08003364 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003365
Benoît Canet5366d0c2014-09-05 15:46:18 +02003366 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003367
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003368 if (bs->growable && ret >= 0) {
3369 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3370 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003371
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003372 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003373}
3374
Kevin Wolfb404f722013-12-03 14:02:23 +01003375/*
3376 * Handle a write request in coroutine context
3377 */
Kevin Wolf66015532013-12-03 14:40:18 +01003378static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3379 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003380 BdrvRequestFlags flags)
3381{
Kevin Wolf65afd212013-12-03 14:55:55 +01003382 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003383 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3384 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3385 uint8_t *head_buf = NULL;
3386 uint8_t *tail_buf = NULL;
3387 QEMUIOVector local_qiov;
3388 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003389 int ret;
3390
3391 if (!bs->drv) {
3392 return -ENOMEDIUM;
3393 }
3394 if (bs->read_only) {
3395 return -EACCES;
3396 }
Kevin Wolf66015532013-12-03 14:40:18 +01003397 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003398 return -EIO;
3399 }
3400
Kevin Wolfb404f722013-12-03 14:02:23 +01003401 /* throttling disk I/O */
3402 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003403 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003404 }
3405
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003406 /*
3407 * Align write if necessary by performing a read-modify-write cycle.
3408 * Pad qiov with the read parts and be sure to have a tracked request not
3409 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3410 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003411 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003412
3413 if (offset & (align - 1)) {
3414 QEMUIOVector head_qiov;
3415 struct iovec head_iov;
3416
3417 mark_request_serialising(&req, align);
3418 wait_serialising_requests(&req);
3419
3420 head_buf = qemu_blockalign(bs, align);
3421 head_iov = (struct iovec) {
3422 .iov_base = head_buf,
3423 .iov_len = align,
3424 };
3425 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3426
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003427 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003428 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3429 align, &head_qiov, 0);
3430 if (ret < 0) {
3431 goto fail;
3432 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003433 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003434
3435 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3436 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3437 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3438 use_local_qiov = true;
3439
3440 bytes += offset & (align - 1);
3441 offset = offset & ~(align - 1);
3442 }
3443
3444 if ((offset + bytes) & (align - 1)) {
3445 QEMUIOVector tail_qiov;
3446 struct iovec tail_iov;
3447 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003448 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003449
3450 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003451 waited = wait_serialising_requests(&req);
3452 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003453
3454 tail_buf = qemu_blockalign(bs, align);
3455 tail_iov = (struct iovec) {
3456 .iov_base = tail_buf,
3457 .iov_len = align,
3458 };
3459 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3460
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003461 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003462 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3463 align, &tail_qiov, 0);
3464 if (ret < 0) {
3465 goto fail;
3466 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003467 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003468
3469 if (!use_local_qiov) {
3470 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3471 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3472 use_local_qiov = true;
3473 }
3474
3475 tail_bytes = (offset + bytes) & (align - 1);
3476 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3477
3478 bytes = ROUND_UP(bytes, align);
3479 }
3480
3481 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3482 use_local_qiov ? &local_qiov : qiov,
3483 flags);
3484
3485fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003486 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003487
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003488 if (use_local_qiov) {
3489 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003490 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003491 qemu_vfree(head_buf);
3492 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003493
Kevin Wolfb404f722013-12-03 14:02:23 +01003494 return ret;
3495}
3496
Kevin Wolf66015532013-12-03 14:40:18 +01003497static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3498 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3499 BdrvRequestFlags flags)
3500{
3501 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3502 return -EINVAL;
3503 }
3504
3505 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3506 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3507}
3508
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003509int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3510 int nb_sectors, QEMUIOVector *qiov)
3511{
3512 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3513
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003514 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3515}
3516
3517int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003518 int64_t sector_num, int nb_sectors,
3519 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003520{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003521 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003522
Peter Lievend32f35c2013-10-24 12:06:52 +02003523 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3524 flags &= ~BDRV_REQ_MAY_UNMAP;
3525 }
3526
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003527 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003528 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003529}
3530
bellard83f64092006-08-01 16:21:11 +00003531/**
bellard83f64092006-08-01 16:21:11 +00003532 * Truncate file to 'offset' bytes (needed only for file protocols)
3533 */
3534int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3535{
3536 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003537 int ret;
bellard83f64092006-08-01 16:21:11 +00003538 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003539 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003540 if (!drv->bdrv_truncate)
3541 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003542 if (bs->read_only)
3543 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003544
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003545 ret = drv->bdrv_truncate(bs, offset);
3546 if (ret == 0) {
3547 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003548 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003549 }
3550 return ret;
bellard83f64092006-08-01 16:21:11 +00003551}
3552
3553/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003554 * Length of a allocated file in bytes. Sparse files are counted by actual
3555 * allocated space. Return < 0 if error or unknown.
3556 */
3557int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3558{
3559 BlockDriver *drv = bs->drv;
3560 if (!drv) {
3561 return -ENOMEDIUM;
3562 }
3563 if (drv->bdrv_get_allocated_file_size) {
3564 return drv->bdrv_get_allocated_file_size(bs);
3565 }
3566 if (bs->file) {
3567 return bdrv_get_allocated_file_size(bs->file);
3568 }
3569 return -ENOTSUP;
3570}
3571
3572/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003573 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003574 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003575int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003576{
3577 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003578
bellard83f64092006-08-01 16:21:11 +00003579 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003580 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003581
Kevin Wolfb94a2612013-10-29 12:18:58 +01003582 if (drv->has_variable_length) {
3583 int ret = refresh_total_sectors(bs, bs->total_sectors);
3584 if (ret < 0) {
3585 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003586 }
bellard83f64092006-08-01 16:21:11 +00003587 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003588 return bs->total_sectors;
3589}
3590
3591/**
3592 * Return length in bytes on success, -errno on error.
3593 * The length is always a multiple of BDRV_SECTOR_SIZE.
3594 */
3595int64_t bdrv_getlength(BlockDriverState *bs)
3596{
3597 int64_t ret = bdrv_nb_sectors(bs);
3598
3599 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003600}
3601
bellard19cb3732006-08-19 11:45:59 +00003602/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003603void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003604{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003605 int64_t nb_sectors = bdrv_nb_sectors(bs);
3606
3607 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003608}
bellardcf989512004-02-16 21:56:36 +00003609
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003610void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3611 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003612{
3613 bs->on_read_error = on_read_error;
3614 bs->on_write_error = on_write_error;
3615}
3616
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003617BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003618{
3619 return is_read ? bs->on_read_error : bs->on_write_error;
3620}
3621
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003622BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3623{
3624 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3625
3626 switch (on_err) {
3627 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003628 return (error == ENOSPC) ?
3629 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003630 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003631 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003632 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003633 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003634 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003635 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003636 default:
3637 abort();
3638 }
3639}
3640
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003641static void send_qmp_error_event(BlockDriverState *bs,
3642 BlockErrorAction action,
3643 bool is_read, int error)
3644{
3645 BlockErrorAction ac;
3646
3647 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3648 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3649 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003650 error == ENOSPC, strerror(error),
3651 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003652}
3653
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003654/* This is done by device models because, while the block layer knows
3655 * about the error, it does not know whether an operation comes from
3656 * the device or the block layer (from a job, for example).
3657 */
3658void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3659 bool is_read, int error)
3660{
3661 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003662
Wenchao Xiaa5895692014-06-18 08:43:30 +02003663 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003664 /* First set the iostatus, so that "info block" returns an iostatus
3665 * that matches the events raised so far (an additional error iostatus
3666 * is fine, but not a lost one).
3667 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003668 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003669
3670 /* Then raise the request to stop the VM and the event.
3671 * qemu_system_vmstop_request_prepare has two effects. First,
3672 * it ensures that the STOP event always comes after the
3673 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3674 * can observe the STOP event and do a "cont" before the STOP
3675 * event is issued, the VM will not stop. In this case, vm_start()
3676 * also ensures that the STOP/RESUME pair of events is emitted.
3677 */
3678 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003679 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003680 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3681 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003682 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003683 }
3684}
3685
bellardb3380822004-03-14 21:38:54 +00003686int bdrv_is_read_only(BlockDriverState *bs)
3687{
3688 return bs->read_only;
3689}
3690
ths985a03b2007-12-24 16:10:43 +00003691int bdrv_is_sg(BlockDriverState *bs)
3692{
3693 return bs->sg;
3694}
3695
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003696int bdrv_enable_write_cache(BlockDriverState *bs)
3697{
3698 return bs->enable_write_cache;
3699}
3700
Paolo Bonzini425b0142012-06-06 00:04:52 +02003701void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3702{
3703 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003704
3705 /* so a reopen() will preserve wce */
3706 if (wce) {
3707 bs->open_flags |= BDRV_O_CACHE_WB;
3708 } else {
3709 bs->open_flags &= ~BDRV_O_CACHE_WB;
3710 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003711}
3712
bellardea2384d2004-08-01 21:59:26 +00003713int bdrv_is_encrypted(BlockDriverState *bs)
3714{
3715 if (bs->backing_hd && bs->backing_hd->encrypted)
3716 return 1;
3717 return bs->encrypted;
3718}
3719
aliguoric0f4ce72009-03-05 23:01:01 +00003720int bdrv_key_required(BlockDriverState *bs)
3721{
3722 BlockDriverState *backing_hd = bs->backing_hd;
3723
3724 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3725 return 1;
3726 return (bs->encrypted && !bs->valid_key);
3727}
3728
bellardea2384d2004-08-01 21:59:26 +00003729int bdrv_set_key(BlockDriverState *bs, const char *key)
3730{
3731 int ret;
3732 if (bs->backing_hd && bs->backing_hd->encrypted) {
3733 ret = bdrv_set_key(bs->backing_hd, key);
3734 if (ret < 0)
3735 return ret;
3736 if (!bs->encrypted)
3737 return 0;
3738 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003739 if (!bs->encrypted) {
3740 return -EINVAL;
3741 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3742 return -ENOMEDIUM;
3743 }
aliguoric0f4ce72009-03-05 23:01:01 +00003744 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003745 if (ret < 0) {
3746 bs->valid_key = 0;
3747 } else if (!bs->valid_key) {
3748 bs->valid_key = 1;
3749 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003750 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003751 }
aliguoric0f4ce72009-03-05 23:01:01 +00003752 return ret;
bellardea2384d2004-08-01 21:59:26 +00003753}
3754
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003755const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003756{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003757 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003758}
3759
Stefan Hajnocziada42402014-08-27 12:08:55 +01003760static int qsort_strcmp(const void *a, const void *b)
3761{
3762 return strcmp(a, b);
3763}
3764
ths5fafdf22007-09-16 21:08:06 +00003765void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003766 void *opaque)
3767{
3768 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003769 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003770 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003771 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003772
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003773 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003774 if (drv->format_name) {
3775 bool found = false;
3776 int i = count;
3777 while (formats && i && !found) {
3778 found = !strcmp(formats[--i], drv->format_name);
3779 }
3780
3781 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003782 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003783 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003784 }
3785 }
bellardea2384d2004-08-01 21:59:26 +00003786 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003787
3788 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3789
3790 for (i = 0; i < count; i++) {
3791 it(opaque, formats[i]);
3792 }
3793
Jeff Codye855e4f2014-04-28 18:29:54 -04003794 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003795}
3796
Benoît Canetdc364f42014-01-23 21:31:32 +01003797/* This function is to find block backend bs */
Markus Armbruster7f06d472014-10-07 13:59:12 +02003798/* TODO convert callers to blk_by_name(), then remove */
bellardb3380822004-03-14 21:38:54 +00003799BlockDriverState *bdrv_find(const char *name)
3800{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003801 BlockBackend *blk = blk_by_name(name);
bellardb3380822004-03-14 21:38:54 +00003802
Markus Armbruster7f06d472014-10-07 13:59:12 +02003803 return blk ? blk_bs(blk) : NULL;
bellardb3380822004-03-14 21:38:54 +00003804}
3805
Benoît Canetdc364f42014-01-23 21:31:32 +01003806/* This function is to find a node in the bs graph */
3807BlockDriverState *bdrv_find_node(const char *node_name)
3808{
3809 BlockDriverState *bs;
3810
3811 assert(node_name);
3812
3813 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3814 if (!strcmp(node_name, bs->node_name)) {
3815 return bs;
3816 }
3817 }
3818 return NULL;
3819}
3820
Benoît Canetc13163f2014-01-23 21:31:34 +01003821/* Put this QMP function here so it can access the static graph_bdrv_states. */
3822BlockDeviceInfoList *bdrv_named_nodes_list(void)
3823{
3824 BlockDeviceInfoList *list, *entry;
3825 BlockDriverState *bs;
3826
3827 list = NULL;
3828 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3829 entry = g_malloc0(sizeof(*entry));
3830 entry->value = bdrv_block_device_info(bs);
3831 entry->next = list;
3832 list = entry;
3833 }
3834
3835 return list;
3836}
3837
Benoît Canet12d3ba82014-01-23 21:31:35 +01003838BlockDriverState *bdrv_lookup_bs(const char *device,
3839 const char *node_name,
3840 Error **errp)
3841{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003842 BlockBackend *blk;
3843 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003844
Benoît Canet12d3ba82014-01-23 21:31:35 +01003845 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003846 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003847
Markus Armbruster7f06d472014-10-07 13:59:12 +02003848 if (blk) {
3849 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003850 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003851 }
3852
Benoît Canetdd67fa52014-02-12 17:15:06 +01003853 if (node_name) {
3854 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003855
Benoît Canetdd67fa52014-02-12 17:15:06 +01003856 if (bs) {
3857 return bs;
3858 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003859 }
3860
Benoît Canetdd67fa52014-02-12 17:15:06 +01003861 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3862 device ? device : "",
3863 node_name ? node_name : "");
3864 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003865}
3866
Jeff Cody5a6684d2014-06-25 15:40:09 -04003867/* If 'base' is in the same chain as 'top', return true. Otherwise,
3868 * return false. If either argument is NULL, return false. */
3869bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3870{
3871 while (top && top != base) {
3872 top = top->backing_hd;
3873 }
3874
3875 return top != NULL;
3876}
3877
Markus Armbruster2f399b02010-06-02 18:55:20 +02003878BlockDriverState *bdrv_next(BlockDriverState *bs)
3879{
3880 if (!bs) {
3881 return QTAILQ_FIRST(&bdrv_states);
3882 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003883 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003884}
3885
Markus Armbruster7f06d472014-10-07 13:59:12 +02003886/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003887const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003888{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003889 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003890}
3891
Markus Armbrusterc8433282012-06-05 16:49:24 +02003892int bdrv_get_flags(BlockDriverState *bs)
3893{
3894 return bs->open_flags;
3895}
3896
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003897int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003898{
3899 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003900 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003901
Benoît Canetdc364f42014-01-23 21:31:32 +01003902 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003903 AioContext *aio_context = bdrv_get_aio_context(bs);
3904 int ret;
3905
3906 aio_context_acquire(aio_context);
3907 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003908 if (ret < 0 && !result) {
3909 result = ret;
3910 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003911 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003912 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003913
3914 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003915}
3916
Peter Lieven3ac21622013-06-28 12:47:42 +02003917int bdrv_has_zero_init_1(BlockDriverState *bs)
3918{
3919 return 1;
3920}
3921
Kevin Wolff2feebb2010-04-14 17:30:35 +02003922int bdrv_has_zero_init(BlockDriverState *bs)
3923{
3924 assert(bs->drv);
3925
Paolo Bonzini11212d82013-09-04 19:00:27 +02003926 /* If BS is a copy on write image, it is initialized to
3927 the contents of the base image, which may not be zeroes. */
3928 if (bs->backing_hd) {
3929 return 0;
3930 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003931 if (bs->drv->bdrv_has_zero_init) {
3932 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003933 }
3934
Peter Lieven3ac21622013-06-28 12:47:42 +02003935 /* safe default */
3936 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003937}
3938
Peter Lieven4ce78692013-10-24 12:06:54 +02003939bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3940{
3941 BlockDriverInfo bdi;
3942
3943 if (bs->backing_hd) {
3944 return false;
3945 }
3946
3947 if (bdrv_get_info(bs, &bdi) == 0) {
3948 return bdi.unallocated_blocks_are_zero;
3949 }
3950
3951 return false;
3952}
3953
3954bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3955{
3956 BlockDriverInfo bdi;
3957
3958 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3959 return false;
3960 }
3961
3962 if (bdrv_get_info(bs, &bdi) == 0) {
3963 return bdi.can_write_zeroes_with_unmap;
3964 }
3965
3966 return false;
3967}
3968
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003969typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003970 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003971 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003972 int64_t sector_num;
3973 int nb_sectors;
3974 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003975 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003976 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003977} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003978
thsf58c7b32008-06-05 21:53:49 +00003979/*
3980 * Returns true iff the specified sector is present in the disk image. Drivers
3981 * not implementing the functionality are assumed to not support backing files,
3982 * hence all their sectors are reported as allocated.
3983 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003984 * If 'sector_num' is beyond the end of the disk image the return value is 0
3985 * and 'pnum' is set to 0.
3986 *
thsf58c7b32008-06-05 21:53:49 +00003987 * 'pnum' is set to the number of sectors (including and immediately following
3988 * the specified sector) that are known to be in the same
3989 * allocated/unallocated state.
3990 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003991 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3992 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003993 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003994static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3995 int64_t sector_num,
3996 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003997{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02003998 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003999 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004000 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004001
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004002 total_sectors = bdrv_nb_sectors(bs);
4003 if (total_sectors < 0) {
4004 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004005 }
4006
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004007 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004008 *pnum = 0;
4009 return 0;
4010 }
4011
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004012 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004013 if (n < nb_sectors) {
4014 nb_sectors = n;
4015 }
4016
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004017 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004018 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004019 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004020 if (bs->drv->protocol_name) {
4021 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4022 }
4023 return ret;
thsf58c7b32008-06-05 21:53:49 +00004024 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004025
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004026 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4027 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004028 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004029 return ret;
4030 }
4031
Peter Lieven92bc50a2013-10-08 14:43:14 +02004032 if (ret & BDRV_BLOCK_RAW) {
4033 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4034 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4035 *pnum, pnum);
4036 }
4037
Kevin Wolfe88ae222014-05-06 15:25:36 +02004038 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4039 ret |= BDRV_BLOCK_ALLOCATED;
4040 }
4041
Peter Lievenc3d86882013-10-24 12:07:04 +02004042 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4043 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004044 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004045 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004046 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004047 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4048 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004049 ret |= BDRV_BLOCK_ZERO;
4050 }
4051 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004052 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004053
4054 if (bs->file &&
4055 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4056 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4057 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4058 *pnum, pnum);
4059 if (ret2 >= 0) {
4060 /* Ignore errors. This is just providing extra information, it
4061 * is useful but not necessary.
4062 */
4063 ret |= (ret2 & BDRV_BLOCK_ZERO);
4064 }
4065 }
4066
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004067 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004068}
4069
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004070/* Coroutine wrapper for bdrv_get_block_status() */
4071static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004072{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004073 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004074 BlockDriverState *bs = data->bs;
4075
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004076 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4077 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004078 data->done = true;
4079}
4080
4081/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004082 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004083 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004084 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004085 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004086int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4087 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004088{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004089 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004090 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004091 .bs = bs,
4092 .sector_num = sector_num,
4093 .nb_sectors = nb_sectors,
4094 .pnum = pnum,
4095 .done = false,
4096 };
4097
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004098 if (qemu_in_coroutine()) {
4099 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004100 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004101 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004102 AioContext *aio_context = bdrv_get_aio_context(bs);
4103
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004104 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004105 qemu_coroutine_enter(co, &data);
4106 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004107 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004108 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004109 }
4110 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004111}
4112
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004113int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4114 int nb_sectors, int *pnum)
4115{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004116 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4117 if (ret < 0) {
4118 return ret;
4119 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004120 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004121}
4122
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004123/*
4124 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4125 *
4126 * Return true if the given sector is allocated in any image between
4127 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4128 * sector is allocated in any image of the chain. Return false otherwise.
4129 *
4130 * 'pnum' is set to the number of sectors (including and immediately following
4131 * the specified sector) that are known to be in the same
4132 * allocated/unallocated state.
4133 *
4134 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004135int bdrv_is_allocated_above(BlockDriverState *top,
4136 BlockDriverState *base,
4137 int64_t sector_num,
4138 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004139{
4140 BlockDriverState *intermediate;
4141 int ret, n = nb_sectors;
4142
4143 intermediate = top;
4144 while (intermediate && intermediate != base) {
4145 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004146 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4147 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004148 if (ret < 0) {
4149 return ret;
4150 } else if (ret) {
4151 *pnum = pnum_inter;
4152 return 1;
4153 }
4154
4155 /*
4156 * [sector_num, nb_sectors] is unallocated on top but intermediate
4157 * might have
4158 *
4159 * [sector_num+x, nr_sectors] allocated.
4160 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004161 if (n > pnum_inter &&
4162 (intermediate == top ||
4163 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004164 n = pnum_inter;
4165 }
4166
4167 intermediate = intermediate->backing_hd;
4168 }
4169
4170 *pnum = n;
4171 return 0;
4172}
4173
aliguori045df332009-03-05 23:00:48 +00004174const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4175{
4176 if (bs->backing_hd && bs->backing_hd->encrypted)
4177 return bs->backing_file;
4178 else if (bs->encrypted)
4179 return bs->filename;
4180 else
4181 return NULL;
4182}
4183
ths5fafdf22007-09-16 21:08:06 +00004184void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004185 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004186{
Kevin Wolf3574c602011-10-26 11:02:11 +02004187 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004188}
4189
ths5fafdf22007-09-16 21:08:06 +00004190int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004191 const uint8_t *buf, int nb_sectors)
4192{
4193 BlockDriver *drv = bs->drv;
4194 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004195 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004196 if (!drv->bdrv_write_compressed)
4197 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02004198 if (bdrv_check_request(bs, sector_num, nb_sectors))
4199 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004200
Fam Zhenge4654d22013-11-13 18:29:43 +08004201 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004202
bellardfaea38e2006-08-05 21:31:00 +00004203 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4204}
ths3b46e622007-09-17 08:09:54 +00004205
bellardfaea38e2006-08-05 21:31:00 +00004206int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4207{
4208 BlockDriver *drv = bs->drv;
4209 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004210 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004211 if (!drv->bdrv_get_info)
4212 return -ENOTSUP;
4213 memset(bdi, 0, sizeof(*bdi));
4214 return drv->bdrv_get_info(bs, bdi);
4215}
4216
Max Reitzeae041f2013-10-09 10:46:16 +02004217ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4218{
4219 BlockDriver *drv = bs->drv;
4220 if (drv && drv->bdrv_get_specific_info) {
4221 return drv->bdrv_get_specific_info(bs);
4222 }
4223 return NULL;
4224}
4225
Christoph Hellwig45566e92009-07-10 23:11:57 +02004226int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4227 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004228{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004229 QEMUIOVector qiov;
4230 struct iovec iov = {
4231 .iov_base = (void *) buf,
4232 .iov_len = size,
4233 };
4234
4235 qemu_iovec_init_external(&qiov, &iov, 1);
4236 return bdrv_writev_vmstate(bs, &qiov, pos);
4237}
4238
4239int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4240{
aliguori178e08a2009-04-05 19:10:55 +00004241 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004242
4243 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004244 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004245 } else if (drv->bdrv_save_vmstate) {
4246 return drv->bdrv_save_vmstate(bs, qiov, pos);
4247 } else if (bs->file) {
4248 return bdrv_writev_vmstate(bs->file, qiov, pos);
4249 }
4250
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004251 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004252}
4253
Christoph Hellwig45566e92009-07-10 23:11:57 +02004254int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4255 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004256{
4257 BlockDriver *drv = bs->drv;
4258 if (!drv)
4259 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004260 if (drv->bdrv_load_vmstate)
4261 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4262 if (bs->file)
4263 return bdrv_load_vmstate(bs->file, buf, pos, size);
4264 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004265}
4266
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004267void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4268{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004269 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004270 return;
4271 }
4272
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004273 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004274}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004275
Kevin Wolf41c695c2012-12-06 14:32:58 +01004276int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4277 const char *tag)
4278{
4279 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4280 bs = bs->file;
4281 }
4282
4283 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4284 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4285 }
4286
4287 return -ENOTSUP;
4288}
4289
Fam Zheng4cc70e92013-11-20 10:01:54 +08004290int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4291{
4292 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4293 bs = bs->file;
4294 }
4295
4296 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4297 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4298 }
4299
4300 return -ENOTSUP;
4301}
4302
Kevin Wolf41c695c2012-12-06 14:32:58 +01004303int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4304{
Max Reitz938789e2014-03-10 23:44:08 +01004305 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004306 bs = bs->file;
4307 }
4308
4309 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4310 return bs->drv->bdrv_debug_resume(bs, tag);
4311 }
4312
4313 return -ENOTSUP;
4314}
4315
4316bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4317{
4318 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4319 bs = bs->file;
4320 }
4321
4322 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4323 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4324 }
4325
4326 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004327}
4328
Blue Swirl199630b2010-07-25 20:49:34 +00004329int bdrv_is_snapshot(BlockDriverState *bs)
4330{
4331 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4332}
4333
Jeff Codyb1b1d782012-10-16 15:49:09 -04004334/* backing_file can either be relative, or absolute, or a protocol. If it is
4335 * relative, it must be relative to the chain. So, passing in bs->filename
4336 * from a BDS as backing_file should not be done, as that may be relative to
4337 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004338BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4339 const char *backing_file)
4340{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004341 char *filename_full = NULL;
4342 char *backing_file_full = NULL;
4343 char *filename_tmp = NULL;
4344 int is_protocol = 0;
4345 BlockDriverState *curr_bs = NULL;
4346 BlockDriverState *retval = NULL;
4347
4348 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004349 return NULL;
4350 }
4351
Jeff Codyb1b1d782012-10-16 15:49:09 -04004352 filename_full = g_malloc(PATH_MAX);
4353 backing_file_full = g_malloc(PATH_MAX);
4354 filename_tmp = g_malloc(PATH_MAX);
4355
4356 is_protocol = path_has_protocol(backing_file);
4357
4358 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4359
4360 /* If either of the filename paths is actually a protocol, then
4361 * compare unmodified paths; otherwise make paths relative */
4362 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4363 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4364 retval = curr_bs->backing_hd;
4365 break;
4366 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004367 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004368 /* If not an absolute filename path, make it relative to the current
4369 * image's filename path */
4370 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4371 backing_file);
4372
4373 /* We are going to compare absolute pathnames */
4374 if (!realpath(filename_tmp, filename_full)) {
4375 continue;
4376 }
4377
4378 /* We need to make sure the backing filename we are comparing against
4379 * is relative to the current image filename (or absolute) */
4380 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4381 curr_bs->backing_file);
4382
4383 if (!realpath(filename_tmp, backing_file_full)) {
4384 continue;
4385 }
4386
4387 if (strcmp(backing_file_full, filename_full) == 0) {
4388 retval = curr_bs->backing_hd;
4389 break;
4390 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004391 }
4392 }
4393
Jeff Codyb1b1d782012-10-16 15:49:09 -04004394 g_free(filename_full);
4395 g_free(backing_file_full);
4396 g_free(filename_tmp);
4397 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004398}
4399
Benoît Canetf198fd12012-08-02 10:22:47 +02004400int bdrv_get_backing_file_depth(BlockDriverState *bs)
4401{
4402 if (!bs->drv) {
4403 return 0;
4404 }
4405
4406 if (!bs->backing_hd) {
4407 return 0;
4408 }
4409
4410 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4411}
4412
bellard83f64092006-08-01 16:21:11 +00004413/**************************************************************/
4414/* async I/Os */
4415
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004416BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4417 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004418 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004419{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004420 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4421
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004422 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004423 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004424}
4425
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004426BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4427 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004428 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004429{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004430 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4431
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004432 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004433 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004434}
4435
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004436BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004437 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004438 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004439{
4440 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4441
4442 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4443 BDRV_REQ_ZERO_WRITE | flags,
4444 cb, opaque, true);
4445}
4446
Kevin Wolf40b4f532009-09-09 17:53:37 +02004447
4448typedef struct MultiwriteCB {
4449 int error;
4450 int num_requests;
4451 int num_callbacks;
4452 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004453 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004454 void *opaque;
4455 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004456 } callbacks[];
4457} MultiwriteCB;
4458
4459static void multiwrite_user_cb(MultiwriteCB *mcb)
4460{
4461 int i;
4462
4463 for (i = 0; i < mcb->num_callbacks; i++) {
4464 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004465 if (mcb->callbacks[i].free_qiov) {
4466 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4467 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004468 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004469 }
4470}
4471
4472static void multiwrite_cb(void *opaque, int ret)
4473{
4474 MultiwriteCB *mcb = opaque;
4475
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004476 trace_multiwrite_cb(mcb, ret);
4477
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004478 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004479 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004480 }
4481
4482 mcb->num_requests--;
4483 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004484 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004485 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004486 }
4487}
4488
4489static int multiwrite_req_compare(const void *a, const void *b)
4490{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004491 const BlockRequest *req1 = a, *req2 = b;
4492
4493 /*
4494 * Note that we can't simply subtract req2->sector from req1->sector
4495 * here as that could overflow the return value.
4496 */
4497 if (req1->sector > req2->sector) {
4498 return 1;
4499 } else if (req1->sector < req2->sector) {
4500 return -1;
4501 } else {
4502 return 0;
4503 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004504}
4505
4506/*
4507 * Takes a bunch of requests and tries to merge them. Returns the number of
4508 * requests that remain after merging.
4509 */
4510static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4511 int num_reqs, MultiwriteCB *mcb)
4512{
4513 int i, outidx;
4514
4515 // Sort requests by start sector
4516 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4517
4518 // Check if adjacent requests touch the same clusters. If so, combine them,
4519 // filling up gaps with zero sectors.
4520 outidx = 0;
4521 for (i = 1; i < num_reqs; i++) {
4522 int merge = 0;
4523 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4524
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004525 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004526 if (reqs[i].sector <= oldreq_last) {
4527 merge = 1;
4528 }
4529
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004530 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4531 merge = 0;
4532 }
4533
Kevin Wolf40b4f532009-09-09 17:53:37 +02004534 if (merge) {
4535 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004536 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004537 qemu_iovec_init(qiov,
4538 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4539
4540 // Add the first request to the merged one. If the requests are
4541 // overlapping, drop the last sectors of the first request.
4542 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004543 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004544
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004545 // We should need to add any zeros between the two requests
4546 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004547
4548 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004549 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004550
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004551 // Add tail of first request, if necessary
4552 if (qiov->size < reqs[outidx].qiov->size) {
4553 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4554 reqs[outidx].qiov->size - qiov->size);
4555 }
4556
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004557 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004558 reqs[outidx].qiov = qiov;
4559
4560 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4561 } else {
4562 outidx++;
4563 reqs[outidx].sector = reqs[i].sector;
4564 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4565 reqs[outidx].qiov = reqs[i].qiov;
4566 }
4567 }
4568
4569 return outidx + 1;
4570}
4571
4572/*
4573 * Submit multiple AIO write requests at once.
4574 *
4575 * On success, the function returns 0 and all requests in the reqs array have
4576 * been submitted. In error case this function returns -1, and any of the
4577 * requests may or may not be submitted yet. In particular, this means that the
4578 * callback will be called for some of the requests, for others it won't. The
4579 * caller must check the error field of the BlockRequest to wait for the right
4580 * callbacks (if error != 0, no callback will be called).
4581 *
4582 * The implementation may modify the contents of the reqs array, e.g. to merge
4583 * requests. However, the fields opaque and error are left unmodified as they
4584 * are used to signal failure for a single request to the caller.
4585 */
4586int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4587{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004588 MultiwriteCB *mcb;
4589 int i;
4590
Ryan Harper301db7c2011-03-07 10:01:04 -06004591 /* don't submit writes if we don't have a medium */
4592 if (bs->drv == NULL) {
4593 for (i = 0; i < num_reqs; i++) {
4594 reqs[i].error = -ENOMEDIUM;
4595 }
4596 return -1;
4597 }
4598
Kevin Wolf40b4f532009-09-09 17:53:37 +02004599 if (num_reqs == 0) {
4600 return 0;
4601 }
4602
4603 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004604 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004605 mcb->num_requests = 0;
4606 mcb->num_callbacks = num_reqs;
4607
4608 for (i = 0; i < num_reqs; i++) {
4609 mcb->callbacks[i].cb = reqs[i].cb;
4610 mcb->callbacks[i].opaque = reqs[i].opaque;
4611 }
4612
4613 // Check for mergable requests
4614 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4615
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004616 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4617
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004618 /* Run the aio requests. */
4619 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004620 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004621 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4622 reqs[i].nb_sectors, reqs[i].flags,
4623 multiwrite_cb, mcb,
4624 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004625 }
4626
4627 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004628}
4629
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004630void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004631{
Fam Zhengca5fd112014-09-11 13:41:27 +08004632 qemu_aio_ref(acb);
4633 bdrv_aio_cancel_async(acb);
4634 while (acb->refcnt > 1) {
4635 if (acb->aiocb_info->get_aio_context) {
4636 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4637 } else if (acb->bs) {
4638 aio_poll(bdrv_get_aio_context(acb->bs), true);
4639 } else {
4640 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004641 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004642 }
Fam Zheng80074292014-09-11 13:41:28 +08004643 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004644}
4645
4646/* Async version of aio cancel. The caller is not blocked if the acb implements
4647 * cancel_async, otherwise we do nothing and let the request normally complete.
4648 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004649void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004650{
4651 if (acb->aiocb_info->cancel_async) {
4652 acb->aiocb_info->cancel_async(acb);
4653 }
bellard83f64092006-08-01 16:21:11 +00004654}
4655
4656/**************************************************************/
4657/* async block device emulation */
4658
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004659typedef struct BlockAIOCBSync {
4660 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004661 QEMUBH *bh;
4662 int ret;
4663 /* vector translation state */
4664 QEMUIOVector *qiov;
4665 uint8_t *bounce;
4666 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004667} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004668
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004669static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004670 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004671};
4672
bellard83f64092006-08-01 16:21:11 +00004673static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004674{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004675 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004676
Kevin Wolf857d4f42014-05-20 13:16:51 +02004677 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004678 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004679 }
aliguoriceb42de2009-04-07 18:43:28 +00004680 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004681 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004682 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004683 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004684 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004685}
bellardbeac80c2006-06-26 20:08:57 +00004686
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004687static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4688 int64_t sector_num,
4689 QEMUIOVector *qiov,
4690 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004691 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004692 void *opaque,
4693 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004694
bellardea2384d2004-08-01 21:59:26 +00004695{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004696 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004697
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004698 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004699 acb->is_write = is_write;
4700 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004701 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004702 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004703
Kevin Wolf857d4f42014-05-20 13:16:51 +02004704 if (acb->bounce == NULL) {
4705 acb->ret = -ENOMEM;
4706 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004707 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004708 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004709 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004710 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004711 }
4712
pbrookce1a14d2006-08-07 02:38:06 +00004713 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004714
pbrookce1a14d2006-08-07 02:38:06 +00004715 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004716}
4717
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004718static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004719 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004720 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004721{
aliguorif141eaf2009-04-07 18:43:24 +00004722 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004723}
4724
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004725static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004726 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004727 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004728{
4729 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4730}
4731
Kevin Wolf68485422011-06-30 10:05:46 +02004732
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004733typedef struct BlockAIOCBCoroutine {
4734 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004735 BlockRequest req;
4736 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004737 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004738 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004739} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004740
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004741static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004742 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004743};
4744
Paolo Bonzini35246a62011-10-14 10:41:29 +02004745static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004746{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004747 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004748
4749 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004750
Kevin Wolf68485422011-06-30 10:05:46 +02004751 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004752 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004753}
4754
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004755/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4756static void coroutine_fn bdrv_co_do_rw(void *opaque)
4757{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004758 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004759 BlockDriverState *bs = acb->common.bs;
4760
4761 if (!acb->is_write) {
4762 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004763 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004764 } else {
4765 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004766 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004767 }
4768
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004769 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004770 qemu_bh_schedule(acb->bh);
4771}
4772
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004773static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4774 int64_t sector_num,
4775 QEMUIOVector *qiov,
4776 int nb_sectors,
4777 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004778 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004779 void *opaque,
4780 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004781{
4782 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004783 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004784
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004785 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004786 acb->req.sector = sector_num;
4787 acb->req.nb_sectors = nb_sectors;
4788 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004789 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004790 acb->is_write = is_write;
4791
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004792 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004793 qemu_coroutine_enter(co, acb);
4794
4795 return &acb->common;
4796}
4797
Paolo Bonzini07f07612011-10-17 12:32:12 +02004798static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004799{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004800 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004801 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004802
Paolo Bonzini07f07612011-10-17 12:32:12 +02004803 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004804 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004805 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004806}
4807
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004808BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004809 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004810{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004811 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004812
Paolo Bonzini07f07612011-10-17 12:32:12 +02004813 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004814 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004815
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004816 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004817
Paolo Bonzini07f07612011-10-17 12:32:12 +02004818 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4819 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004820
Alexander Graf016f5cf2010-05-26 17:51:49 +02004821 return &acb->common;
4822}
4823
Paolo Bonzini4265d622011-10-17 12:32:14 +02004824static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4825{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004826 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004827 BlockDriverState *bs = acb->common.bs;
4828
4829 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004830 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004831 qemu_bh_schedule(acb->bh);
4832}
4833
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004834BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004835 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004836 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004837{
4838 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004839 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004840
4841 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4842
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004843 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004844 acb->req.sector = sector_num;
4845 acb->req.nb_sectors = nb_sectors;
4846 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4847 qemu_coroutine_enter(co, acb);
4848
4849 return &acb->common;
4850}
4851
bellardea2384d2004-08-01 21:59:26 +00004852void bdrv_init(void)
4853{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004854 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004855}
pbrookce1a14d2006-08-07 02:38:06 +00004856
Markus Armbrustereb852012009-10-27 18:41:44 +01004857void bdrv_init_with_whitelist(void)
4858{
4859 use_bdrv_whitelist = 1;
4860 bdrv_init();
4861}
4862
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004863void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004864 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004865{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004866 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004867
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004868 acb = g_slice_alloc(aiocb_info->aiocb_size);
4869 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004870 acb->bs = bs;
4871 acb->cb = cb;
4872 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004873 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004874 return acb;
4875}
4876
Fam Zhengf197fe22014-09-11 13:41:08 +08004877void qemu_aio_ref(void *p)
4878{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004879 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004880 acb->refcnt++;
4881}
4882
Fam Zheng80074292014-09-11 13:41:28 +08004883void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004884{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004885 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004886 assert(acb->refcnt > 0);
4887 if (--acb->refcnt == 0) {
4888 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4889 }
pbrookce1a14d2006-08-07 02:38:06 +00004890}
bellard19cb3732006-08-19 11:45:59 +00004891
4892/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004893/* Coroutine block device emulation */
4894
4895typedef struct CoroutineIOCompletion {
4896 Coroutine *coroutine;
4897 int ret;
4898} CoroutineIOCompletion;
4899
4900static void bdrv_co_io_em_complete(void *opaque, int ret)
4901{
4902 CoroutineIOCompletion *co = opaque;
4903
4904 co->ret = ret;
4905 qemu_coroutine_enter(co->coroutine, NULL);
4906}
4907
4908static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4909 int nb_sectors, QEMUIOVector *iov,
4910 bool is_write)
4911{
4912 CoroutineIOCompletion co = {
4913 .coroutine = qemu_coroutine_self(),
4914 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004915 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004916
4917 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004918 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4919 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004920 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004921 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4922 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004923 }
4924
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004925 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004926 if (!acb) {
4927 return -EIO;
4928 }
4929 qemu_coroutine_yield();
4930
4931 return co.ret;
4932}
4933
4934static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4935 int64_t sector_num, int nb_sectors,
4936 QEMUIOVector *iov)
4937{
4938 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4939}
4940
4941static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4942 int64_t sector_num, int nb_sectors,
4943 QEMUIOVector *iov)
4944{
4945 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4946}
4947
Paolo Bonzini07f07612011-10-17 12:32:12 +02004948static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004949{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004950 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004951
Paolo Bonzini07f07612011-10-17 12:32:12 +02004952 rwco->ret = bdrv_co_flush(rwco->bs);
4953}
4954
4955int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4956{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004957 int ret;
4958
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004959 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004960 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004961 }
4962
Kevin Wolfca716362011-11-10 18:13:59 +01004963 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004964 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004965 if (bs->drv->bdrv_co_flush_to_os) {
4966 ret = bs->drv->bdrv_co_flush_to_os(bs);
4967 if (ret < 0) {
4968 return ret;
4969 }
4970 }
4971
Kevin Wolfca716362011-11-10 18:13:59 +01004972 /* But don't actually force it to the disk with cache=unsafe */
4973 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004974 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004975 }
4976
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004977 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004978 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004979 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004980 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004981 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004982 CoroutineIOCompletion co = {
4983 .coroutine = qemu_coroutine_self(),
4984 };
4985
4986 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4987 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004988 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004989 } else {
4990 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004991 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004992 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004993 } else {
4994 /*
4995 * Some block drivers always operate in either writethrough or unsafe
4996 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4997 * know how the server works (because the behaviour is hardcoded or
4998 * depends on server-side configuration), so we can't ensure that
4999 * everything is safe on disk. Returning an error doesn't work because
5000 * that would break guests even if the server operates in writethrough
5001 * mode.
5002 *
5003 * Let's hope the user knows what he's doing.
5004 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005005 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005006 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005007 if (ret < 0) {
5008 return ret;
5009 }
5010
5011 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5012 * in the case of cache=unsafe, so there are no useless flushes.
5013 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005014flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005015 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005016}
5017
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005018void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005019{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005020 Error *local_err = NULL;
5021 int ret;
5022
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005023 if (!bs->drv) {
5024 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005025 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005026
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005027 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5028 return;
5029 }
5030 bs->open_flags &= ~BDRV_O_INCOMING;
5031
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005032 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005033 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005034 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005035 bdrv_invalidate_cache(bs->file, &local_err);
5036 }
5037 if (local_err) {
5038 error_propagate(errp, local_err);
5039 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005040 }
5041
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005042 ret = refresh_total_sectors(bs, bs->total_sectors);
5043 if (ret < 0) {
5044 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5045 return;
5046 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005047}
5048
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005049void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005050{
5051 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005052 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005053
Benoît Canetdc364f42014-01-23 21:31:32 +01005054 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005055 AioContext *aio_context = bdrv_get_aio_context(bs);
5056
5057 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005058 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005059 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005060 if (local_err) {
5061 error_propagate(errp, local_err);
5062 return;
5063 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005064 }
5065}
5066
Paolo Bonzini07f07612011-10-17 12:32:12 +02005067int bdrv_flush(BlockDriverState *bs)
5068{
5069 Coroutine *co;
5070 RwCo rwco = {
5071 .bs = bs,
5072 .ret = NOT_DONE,
5073 };
5074
5075 if (qemu_in_coroutine()) {
5076 /* Fast-path if already in coroutine context */
5077 bdrv_flush_co_entry(&rwco);
5078 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005079 AioContext *aio_context = bdrv_get_aio_context(bs);
5080
Paolo Bonzini07f07612011-10-17 12:32:12 +02005081 co = qemu_coroutine_create(bdrv_flush_co_entry);
5082 qemu_coroutine_enter(co, &rwco);
5083 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005084 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005085 }
5086 }
5087
5088 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005089}
5090
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005091typedef struct DiscardCo {
5092 BlockDriverState *bs;
5093 int64_t sector_num;
5094 int nb_sectors;
5095 int ret;
5096} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005097static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5098{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005099 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005100
5101 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5102}
5103
Peter Lieven6f14da52013-10-24 12:06:59 +02005104/* if no limit is specified in the BlockLimits use a default
5105 * of 32768 512-byte sectors (16 MiB) per request.
5106 */
5107#define MAX_DISCARD_DEFAULT 32768
5108
Paolo Bonzini4265d622011-10-17 12:32:14 +02005109int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5110 int nb_sectors)
5111{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005112 int max_discard;
5113
Paolo Bonzini4265d622011-10-17 12:32:14 +02005114 if (!bs->drv) {
5115 return -ENOMEDIUM;
5116 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5117 return -EIO;
5118 } else if (bs->read_only) {
5119 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005120 }
5121
Fam Zhenge4654d22013-11-13 18:29:43 +08005122 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005123
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005124 /* Do nothing if disabled. */
5125 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5126 return 0;
5127 }
5128
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005129 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005130 return 0;
5131 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005132
5133 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5134 while (nb_sectors > 0) {
5135 int ret;
5136 int num = nb_sectors;
5137
5138 /* align request */
5139 if (bs->bl.discard_alignment &&
5140 num >= bs->bl.discard_alignment &&
5141 sector_num % bs->bl.discard_alignment) {
5142 if (num > bs->bl.discard_alignment) {
5143 num = bs->bl.discard_alignment;
5144 }
5145 num -= sector_num % bs->bl.discard_alignment;
5146 }
5147
5148 /* limit request size */
5149 if (num > max_discard) {
5150 num = max_discard;
5151 }
5152
5153 if (bs->drv->bdrv_co_discard) {
5154 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5155 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005156 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005157 CoroutineIOCompletion co = {
5158 .coroutine = qemu_coroutine_self(),
5159 };
5160
5161 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5162 bdrv_co_io_em_complete, &co);
5163 if (acb == NULL) {
5164 return -EIO;
5165 } else {
5166 qemu_coroutine_yield();
5167 ret = co.ret;
5168 }
5169 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005170 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005171 return ret;
5172 }
5173
5174 sector_num += num;
5175 nb_sectors -= num;
5176 }
5177 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005178}
5179
5180int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5181{
5182 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005183 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005184 .bs = bs,
5185 .sector_num = sector_num,
5186 .nb_sectors = nb_sectors,
5187 .ret = NOT_DONE,
5188 };
5189
5190 if (qemu_in_coroutine()) {
5191 /* Fast-path if already in coroutine context */
5192 bdrv_discard_co_entry(&rwco);
5193 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005194 AioContext *aio_context = bdrv_get_aio_context(bs);
5195
Paolo Bonzini4265d622011-10-17 12:32:14 +02005196 co = qemu_coroutine_create(bdrv_discard_co_entry);
5197 qemu_coroutine_enter(co, &rwco);
5198 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005199 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005200 }
5201 }
5202
5203 return rwco.ret;
5204}
5205
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005206/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005207/* removable device support */
5208
5209/**
5210 * Return TRUE if the media is present
5211 */
5212int bdrv_is_inserted(BlockDriverState *bs)
5213{
5214 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005215
bellard19cb3732006-08-19 11:45:59 +00005216 if (!drv)
5217 return 0;
5218 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005219 return 1;
5220 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005221}
5222
5223/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005224 * Return whether the media changed since the last call to this
5225 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005226 */
5227int bdrv_media_changed(BlockDriverState *bs)
5228{
5229 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005230
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005231 if (drv && drv->bdrv_media_changed) {
5232 return drv->bdrv_media_changed(bs);
5233 }
5234 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005235}
5236
5237/**
5238 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5239 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005240void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005241{
5242 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005243 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005244
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005245 if (drv && drv->bdrv_eject) {
5246 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005247 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005248
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005249 device_name = bdrv_get_device_name(bs);
5250 if (device_name[0] != '\0') {
5251 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005252 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005253 }
bellard19cb3732006-08-19 11:45:59 +00005254}
5255
bellard19cb3732006-08-19 11:45:59 +00005256/**
5257 * Lock or unlock the media (if it is locked, the user won't be able
5258 * to eject it manually).
5259 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005260void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005261{
5262 BlockDriver *drv = bs->drv;
5263
Markus Armbruster025e8492011-09-06 18:58:47 +02005264 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005265
Markus Armbruster025e8492011-09-06 18:58:47 +02005266 if (drv && drv->bdrv_lock_medium) {
5267 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005268 }
5269}
ths985a03b2007-12-24 16:10:43 +00005270
5271/* needed for generic scsi interface */
5272
5273int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5274{
5275 BlockDriver *drv = bs->drv;
5276
5277 if (drv && drv->bdrv_ioctl)
5278 return drv->bdrv_ioctl(bs, req, buf);
5279 return -ENOTSUP;
5280}
aliguori7d780662009-03-12 19:57:08 +00005281
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005282BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005283 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005284 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005285{
aliguori221f7152009-03-28 17:28:41 +00005286 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005287
aliguori221f7152009-03-28 17:28:41 +00005288 if (drv && drv->bdrv_aio_ioctl)
5289 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5290 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005291}
aliguorie268ca52009-04-22 20:20:00 +00005292
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005293void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005294{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005295 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005296}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005297
aliguorie268ca52009-04-22 20:20:00 +00005298void *qemu_blockalign(BlockDriverState *bs, size_t size)
5299{
Kevin Wolf339064d2013-11-28 10:23:32 +01005300 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005301}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005302
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005303void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5304{
5305 size_t align = bdrv_opt_mem_align(bs);
5306
5307 /* Ensure that NULL is never returned on success */
5308 assert(align > 0);
5309 if (size == 0) {
5310 size = align;
5311 }
5312
5313 return qemu_try_memalign(align, size);
5314}
5315
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005316/*
5317 * Check if all memory in this vector is sector aligned.
5318 */
5319bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5320{
5321 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005322 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005323
5324 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005325 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005326 return false;
5327 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005328 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005329 return false;
5330 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005331 }
5332
5333 return true;
5334}
5335
Fam Zhengb8afb522014-04-16 09:34:30 +08005336BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5337 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005338{
5339 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005340 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005341
Paolo Bonzini50717e92013-01-21 17:09:45 +01005342 assert((granularity & (granularity - 1)) == 0);
5343
Fam Zhenge4654d22013-11-13 18:29:43 +08005344 granularity >>= BDRV_SECTOR_BITS;
5345 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005346 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005347 if (bitmap_size < 0) {
5348 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5349 errno = -bitmap_size;
5350 return NULL;
5351 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005352 bitmap = g_new0(BdrvDirtyBitmap, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +08005353 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5354 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5355 return bitmap;
5356}
5357
5358void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5359{
5360 BdrvDirtyBitmap *bm, *next;
5361 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5362 if (bm == bitmap) {
5363 QLIST_REMOVE(bitmap, list);
5364 hbitmap_free(bitmap->bitmap);
5365 g_free(bitmap);
5366 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005367 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005368 }
5369}
5370
Fam Zheng21b56832013-11-13 18:29:44 +08005371BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5372{
5373 BdrvDirtyBitmap *bm;
5374 BlockDirtyInfoList *list = NULL;
5375 BlockDirtyInfoList **plist = &list;
5376
5377 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005378 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5379 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005380 info->count = bdrv_get_dirty_count(bs, bm);
5381 info->granularity =
5382 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5383 entry->value = info;
5384 *plist = entry;
5385 plist = &entry->next;
5386 }
5387
5388 return list;
5389}
5390
Fam Zhenge4654d22013-11-13 18:29:43 +08005391int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005392{
Fam Zhenge4654d22013-11-13 18:29:43 +08005393 if (bitmap) {
5394 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005395 } else {
5396 return 0;
5397 }
5398}
5399
Fam Zhenge4654d22013-11-13 18:29:43 +08005400void bdrv_dirty_iter_init(BlockDriverState *bs,
5401 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005402{
Fam Zhenge4654d22013-11-13 18:29:43 +08005403 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005404}
5405
5406void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5407 int nr_sectors)
5408{
Fam Zhenge4654d22013-11-13 18:29:43 +08005409 BdrvDirtyBitmap *bitmap;
5410 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5411 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005412 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005413}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005414
Fam Zhenge4654d22013-11-13 18:29:43 +08005415void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5416{
5417 BdrvDirtyBitmap *bitmap;
5418 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5419 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5420 }
5421}
5422
5423int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5424{
5425 return hbitmap_count(bitmap->bitmap);
5426}
5427
Fam Zheng9fcb0252013-08-23 09:14:46 +08005428/* Get a reference to bs */
5429void bdrv_ref(BlockDriverState *bs)
5430{
5431 bs->refcnt++;
5432}
5433
5434/* Release a previously grabbed reference to bs.
5435 * If after releasing, reference count is zero, the BlockDriverState is
5436 * deleted. */
5437void bdrv_unref(BlockDriverState *bs)
5438{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005439 if (!bs) {
5440 return;
5441 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005442 assert(bs->refcnt > 0);
5443 if (--bs->refcnt == 0) {
5444 bdrv_delete(bs);
5445 }
5446}
5447
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005448struct BdrvOpBlocker {
5449 Error *reason;
5450 QLIST_ENTRY(BdrvOpBlocker) list;
5451};
5452
5453bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5454{
5455 BdrvOpBlocker *blocker;
5456 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5457 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5458 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5459 if (errp) {
5460 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005461 bdrv_get_device_name(bs),
5462 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005463 }
5464 return true;
5465 }
5466 return false;
5467}
5468
5469void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5470{
5471 BdrvOpBlocker *blocker;
5472 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5473
Markus Armbruster5839e532014-08-19 10:31:08 +02005474 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005475 blocker->reason = reason;
5476 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5477}
5478
5479void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5480{
5481 BdrvOpBlocker *blocker, *next;
5482 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5483 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5484 if (blocker->reason == reason) {
5485 QLIST_REMOVE(blocker, list);
5486 g_free(blocker);
5487 }
5488 }
5489}
5490
5491void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5492{
5493 int i;
5494 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5495 bdrv_op_block(bs, i, reason);
5496 }
5497}
5498
5499void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5500{
5501 int i;
5502 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5503 bdrv_op_unblock(bs, i, reason);
5504 }
5505}
5506
5507bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5508{
5509 int i;
5510
5511 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5512 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5513 return false;
5514 }
5515 }
5516 return true;
5517}
5518
Luiz Capitulino28a72822011-09-26 17:43:50 -03005519void bdrv_iostatus_enable(BlockDriverState *bs)
5520{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005521 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005522 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005523}
5524
5525/* The I/O status is only enabled if the drive explicitly
5526 * enables it _and_ the VM is configured to stop on errors */
5527bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5528{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005529 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005530 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5531 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5532 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005533}
5534
5535void bdrv_iostatus_disable(BlockDriverState *bs)
5536{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005537 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005538}
5539
5540void bdrv_iostatus_reset(BlockDriverState *bs)
5541{
5542 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005543 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005544 if (bs->job) {
5545 block_job_iostatus_reset(bs->job);
5546 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005547 }
5548}
5549
Luiz Capitulino28a72822011-09-26 17:43:50 -03005550void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5551{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005552 assert(bdrv_iostatus_is_enabled(bs));
5553 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005554 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5555 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005556 }
5557}
5558
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005559void bdrv_img_create(const char *filename, const char *fmt,
5560 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005561 char *options, uint64_t img_size, int flags,
5562 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005563{
Chunyan Liu83d05212014-06-05 17:20:51 +08005564 QemuOptsList *create_opts = NULL;
5565 QemuOpts *opts = NULL;
5566 const char *backing_fmt, *backing_file;
5567 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005568 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005569 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005570 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005571 int ret = 0;
5572
5573 /* Find driver and parse its options */
5574 drv = bdrv_find_format(fmt);
5575 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005576 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005577 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005578 }
5579
Kevin Wolf98289622013-07-10 15:47:39 +02005580 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005581 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005582 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005583 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005584 }
5585
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005586 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5587 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005588
5589 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005590 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5591 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005592
5593 /* Parse -o options */
5594 if (options) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005595 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5596 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005597 goto out;
5598 }
5599 }
5600
5601 if (base_filename) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005602 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005603 error_setg(errp, "Backing file not supported for file format '%s'",
5604 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005605 goto out;
5606 }
5607 }
5608
5609 if (base_fmt) {
Chunyan Liu83d05212014-06-05 17:20:51 +08005610 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005611 error_setg(errp, "Backing file format not supported for file "
5612 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005613 goto out;
5614 }
5615 }
5616
Chunyan Liu83d05212014-06-05 17:20:51 +08005617 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5618 if (backing_file) {
5619 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005620 error_setg(errp, "Error: Trying to create an image with the "
5621 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005622 goto out;
5623 }
5624 }
5625
Chunyan Liu83d05212014-06-05 17:20:51 +08005626 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5627 if (backing_fmt) {
5628 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005629 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005630 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005631 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005632 goto out;
5633 }
5634 }
5635
5636 // The size for the image must always be specified, with one exception:
5637 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005638 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5639 if (size == -1) {
5640 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005641 BlockDriverState *bs;
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005642 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005643 int back_flags;
5644
5645 /* backing files always opened read-only */
5646 back_flags =
5647 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005648
Max Reitzf67503e2014-02-18 18:33:05 +01005649 bs = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005650 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005651 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005652 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005653 error_setg_errno(errp, -ret, "Could not open '%s': %s",
Chunyan Liu83d05212014-06-05 17:20:51 +08005654 backing_file,
Max Reitzcc84d902013-09-06 17:14:26 +02005655 error_get_pretty(local_err));
5656 error_free(local_err);
5657 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005658 goto out;
5659 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005660 size = bdrv_getlength(bs);
5661 if (size < 0) {
5662 error_setg_errno(errp, -size, "Could not get size of '%s'",
5663 backing_file);
5664 bdrv_unref(bs);
5665 goto out;
5666 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005667
Chunyan Liu83d05212014-06-05 17:20:51 +08005668 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
Max Reitz66f6b812013-12-03 14:57:52 +01005669
5670 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005671 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005672 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005673 goto out;
5674 }
5675 }
5676
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005677 if (!quiet) {
5678 printf("Formatting '%s', fmt=%s ", filename, fmt);
Chunyan Liu83d05212014-06-05 17:20:51 +08005679 qemu_opts_print(opts);
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005680 puts("");
5681 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005682
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005683 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005684
Max Reitzcc84d902013-09-06 17:14:26 +02005685 if (ret == -EFBIG) {
5686 /* This is generally a better message than whatever the driver would
5687 * deliver (especially because of the cluster_size_hint), since that
5688 * is most probably not much different from "image too large". */
5689 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005690 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005691 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005692 }
Max Reitzcc84d902013-09-06 17:14:26 +02005693 error_setg(errp, "The image size is too large for file format '%s'"
5694 "%s", fmt, cluster_size_hint);
5695 error_free(local_err);
5696 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005697 }
5698
5699out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005700 qemu_opts_del(opts);
5701 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005702 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005703 error_propagate(errp, local_err);
5704 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005705}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005706
5707AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5708{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005709 return bs->aio_context;
5710}
5711
5712void bdrv_detach_aio_context(BlockDriverState *bs)
5713{
Max Reitz33384422014-06-20 21:57:33 +02005714 BdrvAioNotifier *baf;
5715
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005716 if (!bs->drv) {
5717 return;
5718 }
5719
Max Reitz33384422014-06-20 21:57:33 +02005720 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5721 baf->detach_aio_context(baf->opaque);
5722 }
5723
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005724 if (bs->io_limits_enabled) {
5725 throttle_detach_aio_context(&bs->throttle_state);
5726 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005727 if (bs->drv->bdrv_detach_aio_context) {
5728 bs->drv->bdrv_detach_aio_context(bs);
5729 }
5730 if (bs->file) {
5731 bdrv_detach_aio_context(bs->file);
5732 }
5733 if (bs->backing_hd) {
5734 bdrv_detach_aio_context(bs->backing_hd);
5735 }
5736
5737 bs->aio_context = NULL;
5738}
5739
5740void bdrv_attach_aio_context(BlockDriverState *bs,
5741 AioContext *new_context)
5742{
Max Reitz33384422014-06-20 21:57:33 +02005743 BdrvAioNotifier *ban;
5744
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005745 if (!bs->drv) {
5746 return;
5747 }
5748
5749 bs->aio_context = new_context;
5750
5751 if (bs->backing_hd) {
5752 bdrv_attach_aio_context(bs->backing_hd, new_context);
5753 }
5754 if (bs->file) {
5755 bdrv_attach_aio_context(bs->file, new_context);
5756 }
5757 if (bs->drv->bdrv_attach_aio_context) {
5758 bs->drv->bdrv_attach_aio_context(bs, new_context);
5759 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005760 if (bs->io_limits_enabled) {
5761 throttle_attach_aio_context(&bs->throttle_state, new_context);
5762 }
Max Reitz33384422014-06-20 21:57:33 +02005763
5764 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5765 ban->attached_aio_context(new_context, ban->opaque);
5766 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005767}
5768
5769void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5770{
5771 bdrv_drain_all(); /* ensure there are no in-flight requests */
5772
5773 bdrv_detach_aio_context(bs);
5774
5775 /* This function executes in the old AioContext so acquire the new one in
5776 * case it runs in a different thread.
5777 */
5778 aio_context_acquire(new_context);
5779 bdrv_attach_aio_context(bs, new_context);
5780 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005781}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005782
Max Reitz33384422014-06-20 21:57:33 +02005783void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5784 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5785 void (*detach_aio_context)(void *opaque), void *opaque)
5786{
5787 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5788 *ban = (BdrvAioNotifier){
5789 .attached_aio_context = attached_aio_context,
5790 .detach_aio_context = detach_aio_context,
5791 .opaque = opaque
5792 };
5793
5794 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5795}
5796
5797void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5798 void (*attached_aio_context)(AioContext *,
5799 void *),
5800 void (*detach_aio_context)(void *),
5801 void *opaque)
5802{
5803 BdrvAioNotifier *ban, *ban_next;
5804
5805 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5806 if (ban->attached_aio_context == attached_aio_context &&
5807 ban->detach_aio_context == detach_aio_context &&
5808 ban->opaque == opaque)
5809 {
5810 QLIST_REMOVE(ban, list);
5811 g_free(ban);
5812
5813 return;
5814 }
5815 }
5816
5817 abort();
5818}
5819
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005820void bdrv_add_before_write_notifier(BlockDriverState *bs,
5821 NotifierWithReturn *notifier)
5822{
5823 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5824}
Max Reitz6f176b42013-09-03 10:09:50 +02005825
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005826int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
Max Reitz6f176b42013-09-03 10:09:50 +02005827{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005828 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005829 return -ENOTSUP;
5830 }
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005831 return bs->drv->bdrv_amend_options(bs, opts);
Max Reitz6f176b42013-09-03 10:09:50 +02005832}
Benoît Canetf6186f42013-10-02 14:33:48 +02005833
Benoît Canetb5042a32014-03-03 19:11:34 +01005834/* This function will be called by the bdrv_recurse_is_first_non_filter method
5835 * of block filter and by bdrv_is_first_non_filter.
5836 * It is used to test if the given bs is the candidate or recurse more in the
5837 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005838 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005839bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5840 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005841{
Benoît Canetb5042a32014-03-03 19:11:34 +01005842 /* return false if basic checks fails */
5843 if (!bs || !bs->drv) {
5844 return false;
5845 }
5846
5847 /* the code reached a non block filter driver -> check if the bs is
5848 * the same as the candidate. It's the recursion termination condition.
5849 */
5850 if (!bs->drv->is_filter) {
5851 return bs == candidate;
5852 }
5853 /* Down this path the driver is a block filter driver */
5854
5855 /* If the block filter recursion method is defined use it to recurse down
5856 * the node graph.
5857 */
5858 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005859 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5860 }
5861
Benoît Canetb5042a32014-03-03 19:11:34 +01005862 /* the driver is a block filter but don't allow to recurse -> return false
5863 */
5864 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005865}
5866
5867/* This function checks if the candidate is the first non filter bs down it's
5868 * bs chain. Since we don't have pointers to parents it explore all bs chains
5869 * from the top. Some filters can choose not to pass down the recursion.
5870 */
5871bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5872{
5873 BlockDriverState *bs;
5874
5875 /* walk down the bs forest recursively */
5876 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5877 bool perm;
5878
Benoît Canetb5042a32014-03-03 19:11:34 +01005879 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005880 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005881
5882 /* candidate is the first non filter */
5883 if (perm) {
5884 return true;
5885 }
5886 }
5887
5888 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005889}
Benoît Canet09158f02014-06-27 18:25:25 +02005890
5891BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5892{
5893 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5894 if (!to_replace_bs) {
5895 error_setg(errp, "Node name '%s' not found", node_name);
5896 return NULL;
5897 }
5898
5899 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5900 return NULL;
5901 }
5902
5903 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5904 * most non filter in order to prevent data corruption.
5905 * Another benefit is that this tests exclude backing files which are
5906 * blocked by the backing blockers.
5907 */
5908 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5909 error_setg(errp, "Only top most non filter can be replaced");
5910 return NULL;
5911 }
5912
5913 return to_replace_bs;
5914}
Ming Lei448ad912014-07-04 18:04:33 +08005915
5916void bdrv_io_plug(BlockDriverState *bs)
5917{
5918 BlockDriver *drv = bs->drv;
5919 if (drv && drv->bdrv_io_plug) {
5920 drv->bdrv_io_plug(bs);
5921 } else if (bs->file) {
5922 bdrv_io_plug(bs->file);
5923 }
5924}
5925
5926void bdrv_io_unplug(BlockDriverState *bs)
5927{
5928 BlockDriver *drv = bs->drv;
5929 if (drv && drv->bdrv_io_unplug) {
5930 drv->bdrv_io_unplug(bs);
5931 } else if (bs->file) {
5932 bdrv_io_unplug(bs->file);
5933 }
5934}
5935
5936void bdrv_flush_io_queue(BlockDriverState *bs)
5937{
5938 BlockDriver *drv = bs->drv;
5939 if (drv && drv->bdrv_flush_io_queue) {
5940 drv->bdrv_flush_io_queue(bs);
5941 } else if (bs->file) {
5942 bdrv_flush_io_queue(bs->file);
5943 }
5944}
Max Reitz91af7012014-07-18 20:24:56 +02005945
5946static bool append_open_options(QDict *d, BlockDriverState *bs)
5947{
5948 const QDictEntry *entry;
5949 bool found_any = false;
5950
5951 for (entry = qdict_first(bs->options); entry;
5952 entry = qdict_next(bs->options, entry))
5953 {
5954 /* Only take options for this level and exclude all non-driver-specific
5955 * options */
5956 if (!strchr(qdict_entry_key(entry), '.') &&
5957 strcmp(qdict_entry_key(entry), "node-name"))
5958 {
5959 qobject_incref(qdict_entry_value(entry));
5960 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5961 found_any = true;
5962 }
5963 }
5964
5965 return found_any;
5966}
5967
5968/* Updates the following BDS fields:
5969 * - exact_filename: A filename which may be used for opening a block device
5970 * which (mostly) equals the given BDS (even without any
5971 * other options; so reading and writing must return the same
5972 * results, but caching etc. may be different)
5973 * - full_open_options: Options which, when given when opening a block device
5974 * (without a filename), result in a BDS (mostly)
5975 * equalling the given one
5976 * - filename: If exact_filename is set, it is copied here. Otherwise,
5977 * full_open_options is converted to a JSON object, prefixed with
5978 * "json:" (for use through the JSON pseudo protocol) and put here.
5979 */
5980void bdrv_refresh_filename(BlockDriverState *bs)
5981{
5982 BlockDriver *drv = bs->drv;
5983 QDict *opts;
5984
5985 if (!drv) {
5986 return;
5987 }
5988
5989 /* This BDS's file name will most probably depend on its file's name, so
5990 * refresh that first */
5991 if (bs->file) {
5992 bdrv_refresh_filename(bs->file);
5993 }
5994
5995 if (drv->bdrv_refresh_filename) {
5996 /* Obsolete information is of no use here, so drop the old file name
5997 * information before refreshing it */
5998 bs->exact_filename[0] = '\0';
5999 if (bs->full_open_options) {
6000 QDECREF(bs->full_open_options);
6001 bs->full_open_options = NULL;
6002 }
6003
6004 drv->bdrv_refresh_filename(bs);
6005 } else if (bs->file) {
6006 /* Try to reconstruct valid information from the underlying file */
6007 bool has_open_options;
6008
6009 bs->exact_filename[0] = '\0';
6010 if (bs->full_open_options) {
6011 QDECREF(bs->full_open_options);
6012 bs->full_open_options = NULL;
6013 }
6014
6015 opts = qdict_new();
6016 has_open_options = append_open_options(opts, bs);
6017
6018 /* If no specific options have been given for this BDS, the filename of
6019 * the underlying file should suffice for this one as well */
6020 if (bs->file->exact_filename[0] && !has_open_options) {
6021 strcpy(bs->exact_filename, bs->file->exact_filename);
6022 }
6023 /* Reconstructing the full options QDict is simple for most format block
6024 * drivers, as long as the full options are known for the underlying
6025 * file BDS. The full options QDict of that file BDS should somehow
6026 * contain a representation of the filename, therefore the following
6027 * suffices without querying the (exact_)filename of this BDS. */
6028 if (bs->file->full_open_options) {
6029 qdict_put_obj(opts, "driver",
6030 QOBJECT(qstring_from_str(drv->format_name)));
6031 QINCREF(bs->file->full_open_options);
6032 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6033
6034 bs->full_open_options = opts;
6035 } else {
6036 QDECREF(opts);
6037 }
6038 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6039 /* There is no underlying file BDS (at least referenced by BDS.file),
6040 * so the full options QDict should be equal to the options given
6041 * specifically for this block device when it was opened (plus the
6042 * driver specification).
6043 * Because those options don't change, there is no need to update
6044 * full_open_options when it's already set. */
6045
6046 opts = qdict_new();
6047 append_open_options(opts, bs);
6048 qdict_put_obj(opts, "driver",
6049 QOBJECT(qstring_from_str(drv->format_name)));
6050
6051 if (bs->exact_filename[0]) {
6052 /* This may not work for all block protocol drivers (some may
6053 * require this filename to be parsed), but we have to find some
6054 * default solution here, so just include it. If some block driver
6055 * does not support pure options without any filename at all or
6056 * needs some special format of the options QDict, it needs to
6057 * implement the driver-specific bdrv_refresh_filename() function.
6058 */
6059 qdict_put_obj(opts, "filename",
6060 QOBJECT(qstring_from_str(bs->exact_filename)));
6061 }
6062
6063 bs->full_open_options = opts;
6064 }
6065
6066 if (bs->exact_filename[0]) {
6067 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6068 } else if (bs->full_open_options) {
6069 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6070 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6071 qstring_get_str(json));
6072 QDECREF(json);
6073 }
6074}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006075
6076/* This accessor function purpose is to allow the device models to access the
6077 * BlockAcctStats structure embedded inside a BlockDriverState without being
6078 * aware of the BlockDriverState structure layout.
6079 * It will go away when the BlockAcctStats structure will be moved inside
6080 * the device models.
6081 */
6082BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6083{
6084 return &bs->stats;
6085}