blob: 0e635245561e3398a91c8658863f4b498e6b2e5f [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
Fam Zhenge4654d22013-11-13 18:29:43 +080055struct BdrvDirtyBitmap {
56 HBitmap *bitmap;
Fam Zheng0db6e542015-04-17 19:49:50 -040057 char *name;
John Snowb8e6fb72015-04-17 19:49:56 -040058 bool disabled;
Fam Zhenge4654d22013-11-13 18:29:43 +080059 QLIST_ENTRY(BdrvDirtyBitmap) list;
60};
61
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010062#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
63
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020064static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000065 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020066 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020067static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000068 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020069 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020070static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000078 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010079static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000081 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020082static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
86 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020087 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020088 void *opaque,
89 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010090static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010091static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020092 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000093
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010094static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000096
Benoît Canetdc364f42014-01-23 21:31:32 +010097static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
99
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100100static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000102
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300103static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
104 int nr_sectors);
105static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
106 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100107/* If non-zero, use only whitelisted block drivers */
108static int use_bdrv_whitelist;
109
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000110#ifdef _WIN32
111static int is_windows_drive_prefix(const char *filename)
112{
113 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
114 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
115 filename[1] == ':');
116}
117
118int is_windows_drive(const char *filename)
119{
120 if (is_windows_drive_prefix(filename) &&
121 filename[2] == '\0')
122 return 1;
123 if (strstart(filename, "\\\\.\\", NULL) ||
124 strstart(filename, "//./", NULL))
125 return 1;
126 return 0;
127}
128#endif
129
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800130/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200131void bdrv_set_io_limits(BlockDriverState *bs,
132 ThrottleConfig *cfg)
133{
134 int i;
135
136 throttle_config(&bs->throttle_state, cfg);
137
138 for (i = 0; i < 2; i++) {
139 qemu_co_enter_next(&bs->throttled_reqs[i]);
140 }
141}
142
143/* this function drain all the throttled IOs */
144static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
145{
146 bool drained = false;
147 bool enabled = bs->io_limits_enabled;
148 int i;
149
150 bs->io_limits_enabled = false;
151
152 for (i = 0; i < 2; i++) {
153 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
154 drained = true;
155 }
156 }
157
158 bs->io_limits_enabled = enabled;
159
160 return drained;
161}
162
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163void bdrv_io_limits_disable(BlockDriverState *bs)
164{
165 bs->io_limits_enabled = false;
166
Benoît Canetcc0681c2013-09-02 14:14:39 +0200167 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800168
Benoît Canetcc0681c2013-09-02 14:14:39 +0200169 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800170}
171
Benoît Canetcc0681c2013-09-02 14:14:39 +0200172static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800173{
174 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200175 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800176}
177
Benoît Canetcc0681c2013-09-02 14:14:39 +0200178static void bdrv_throttle_write_timer_cb(void *opaque)
179{
180 BlockDriverState *bs = opaque;
181 qemu_co_enter_next(&bs->throttled_reqs[1]);
182}
183
184/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800185void bdrv_io_limits_enable(BlockDriverState *bs)
186{
Fam Zhengde50a202015-03-25 15:27:26 +0800187 int clock_type = QEMU_CLOCK_REALTIME;
188
189 if (qtest_enabled()) {
190 /* For testing block IO throttling only */
191 clock_type = QEMU_CLOCK_VIRTUAL;
192 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200193 assert(!bs->io_limits_enabled);
194 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200195 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800196 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200197 bdrv_throttle_read_timer_cb,
198 bdrv_throttle_write_timer_cb,
199 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800200 bs->io_limits_enabled = true;
201}
202
Benoît Canetcc0681c2013-09-02 14:14:39 +0200203/* This function makes an IO wait if needed
204 *
205 * @nb_sectors: the number of sectors of the IO
206 * @is_write: is the IO a write
207 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800208static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100209 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200210 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800211{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200212 /* does this io must wait */
213 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214
Benoît Canetcc0681c2013-09-02 14:14:39 +0200215 /* if must wait or any request of this type throttled queue the IO */
216 if (must_wait ||
217 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
218 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800219 }
220
Benoît Canetcc0681c2013-09-02 14:14:39 +0200221 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100222 throttle_account(&bs->throttle_state, is_write, bytes);
223
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800224
Benoît Canetcc0681c2013-09-02 14:14:39 +0200225 /* if the next request must wait -> do nothing */
226 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
227 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800228 }
229
Benoît Canetcc0681c2013-09-02 14:14:39 +0200230 /* else queue next request for execution */
231 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800232}
233
Kevin Wolf339064d2013-11-28 10:23:32 +0100234size_t bdrv_opt_mem_align(BlockDriverState *bs)
235{
236 if (!bs || !bs->drv) {
237 /* 4k should be on the safe side */
238 return 4096;
239 }
240
241 return bs->bl.opt_mem_alignment;
242}
243
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100245int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000246{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200247 const char *p;
248
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000249#ifdef _WIN32
250 if (is_windows_drive(path) ||
251 is_windows_drive_prefix(path)) {
252 return 0;
253 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200254 p = path + strcspn(path, ":/\\");
255#else
256 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000257#endif
258
Paolo Bonzini947995c2012-05-08 16:51:48 +0200259 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000260}
261
bellard83f64092006-08-01 16:21:11 +0000262int path_is_absolute(const char *path)
263{
bellard21664422007-01-07 18:22:37 +0000264#ifdef _WIN32
265 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200266 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000267 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200268 }
269 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000270#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200271 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000272#endif
bellard83f64092006-08-01 16:21:11 +0000273}
274
275/* if filename is absolute, just copy it to dest. Otherwise, build a
276 path to it by considering it is relative to base_path. URL are
277 supported. */
278void path_combine(char *dest, int dest_size,
279 const char *base_path,
280 const char *filename)
281{
282 const char *p, *p1;
283 int len;
284
285 if (dest_size <= 0)
286 return;
287 if (path_is_absolute(filename)) {
288 pstrcpy(dest, dest_size, filename);
289 } else {
290 p = strchr(base_path, ':');
291 if (p)
292 p++;
293 else
294 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000295 p1 = strrchr(base_path, '/');
296#ifdef _WIN32
297 {
298 const char *p2;
299 p2 = strrchr(base_path, '\\');
300 if (!p1 || p2 > p1)
301 p1 = p2;
302 }
303#endif
bellard83f64092006-08-01 16:21:11 +0000304 if (p1)
305 p1++;
306 else
307 p1 = base_path;
308 if (p1 > p)
309 p = p1;
310 len = p - base_path;
311 if (len > dest_size - 1)
312 len = dest_size - 1;
313 memcpy(dest, base_path, len);
314 dest[len] = '\0';
315 pstrcat(dest, dest_size, filename);
316 }
317}
318
Max Reitz0a828552014-11-26 17:20:25 +0100319void bdrv_get_full_backing_filename_from_filename(const char *backed,
320 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100321 char *dest, size_t sz,
322 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100323{
Max Reitz9f074292014-11-26 17:20:26 +0100324 if (backing[0] == '\0' || path_has_protocol(backing) ||
325 path_is_absolute(backing))
326 {
Max Reitz0a828552014-11-26 17:20:25 +0100327 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100328 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
329 error_setg(errp, "Cannot use relative backing file names for '%s'",
330 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100331 } else {
332 path_combine(dest, sz, backed, backing);
333 }
334}
335
Max Reitz9f074292014-11-26 17:20:26 +0100336void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
337 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200338{
Max Reitz9f074292014-11-26 17:20:26 +0100339 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
340
341 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
342 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200343}
344
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500345void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000346{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100347 /* Block drivers without coroutine functions need emulation */
348 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200349 bdrv->bdrv_co_readv = bdrv_co_readv_em;
350 bdrv->bdrv_co_writev = bdrv_co_writev_em;
351
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100352 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
353 * the block driver lacks aio we need to emulate that too.
354 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200355 if (!bdrv->bdrv_aio_readv) {
356 /* add AIO emulation layer */
357 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
358 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200359 }
bellard83f64092006-08-01 16:21:11 +0000360 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200361
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100362 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000363}
bellardb3380822004-03-14 21:38:54 +0000364
Markus Armbruster7f06d472014-10-07 13:59:12 +0200365BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000366{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200367 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200368
Markus Armbrustere4e99862014-10-07 13:59:03 +0200369 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200370 return bs;
371}
372
373BlockDriverState *bdrv_new(void)
374{
375 BlockDriverState *bs;
376 int i;
377
Markus Armbruster5839e532014-08-19 10:31:08 +0200378 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800379 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800380 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
381 QLIST_INIT(&bs->op_blockers[i]);
382 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300383 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200384 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200385 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200386 qemu_co_queue_init(&bs->throttled_reqs[0]);
387 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800388 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200389 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200390
bellardb3380822004-03-14 21:38:54 +0000391 return bs;
392}
393
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200394void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
395{
396 notifier_list_add(&bs->close_notifiers, notify);
397}
398
bellardea2384d2004-08-01 21:59:26 +0000399BlockDriver *bdrv_find_format(const char *format_name)
400{
401 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100402 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
403 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000404 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100405 }
bellardea2384d2004-08-01 21:59:26 +0000406 }
407 return NULL;
408}
409
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800410static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100411{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800412 static const char *whitelist_rw[] = {
413 CONFIG_BDRV_RW_WHITELIST
414 };
415 static const char *whitelist_ro[] = {
416 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100417 };
418 const char **p;
419
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800420 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100421 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800422 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100423
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800424 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100425 if (!strcmp(drv->format_name, *p)) {
426 return 1;
427 }
428 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800429 if (read_only) {
430 for (p = whitelist_ro; *p; p++) {
431 if (!strcmp(drv->format_name, *p)) {
432 return 1;
433 }
434 }
435 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100436 return 0;
437}
438
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800439BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
440 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100441{
442 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800443 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100444}
445
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800446typedef struct CreateCo {
447 BlockDriver *drv;
448 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800449 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800450 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200451 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800452} CreateCo;
453
454static void coroutine_fn bdrv_create_co_entry(void *opaque)
455{
Max Reitzcc84d902013-09-06 17:14:26 +0200456 Error *local_err = NULL;
457 int ret;
458
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800459 CreateCo *cco = opaque;
460 assert(cco->drv);
461
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800462 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100463 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200464 error_propagate(&cco->err, local_err);
465 }
466 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800467}
468
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200469int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800470 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000471{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200473
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800474 Coroutine *co;
475 CreateCo cco = {
476 .drv = drv,
477 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800478 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800479 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200480 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800481 };
482
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800483 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200484 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300485 ret = -ENOTSUP;
486 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800487 }
488
489 if (qemu_in_coroutine()) {
490 /* Fast-path if already in coroutine context */
491 bdrv_create_co_entry(&cco);
492 } else {
493 co = qemu_coroutine_create(bdrv_create_co_entry);
494 qemu_coroutine_enter(co, &cco);
495 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200496 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800497 }
498 }
499
500 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200501 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100502 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200503 error_propagate(errp, cco.err);
504 } else {
505 error_setg_errno(errp, -ret, "Could not create image");
506 }
507 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800508
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300509out:
510 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800511 return ret;
bellardea2384d2004-08-01 21:59:26 +0000512}
513
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800514int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200515{
516 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200517 Error *local_err = NULL;
518 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200519
Max Reitzb65a5e12015-02-05 13:58:12 -0500520 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200521 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000522 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200523 }
524
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800525 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100526 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200527 error_propagate(errp, local_err);
528 }
529 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200530}
531
Kevin Wolf3baca892014-07-16 17:48:16 +0200532void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100533{
534 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200535 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100536
537 memset(&bs->bl, 0, sizeof(bs->bl));
538
Kevin Wolf466ad822013-12-11 19:50:32 +0100539 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200540 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100541 }
542
543 /* Take some limits from the children as a default */
544 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200545 bdrv_refresh_limits(bs->file, &local_err);
546 if (local_err) {
547 error_propagate(errp, local_err);
548 return;
549 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100550 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100551 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100552 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
553 } else {
554 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100555 }
556
557 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200558 bdrv_refresh_limits(bs->backing_hd, &local_err);
559 if (local_err) {
560 error_propagate(errp, local_err);
561 return;
562 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100563 bs->bl.opt_transfer_length =
564 MAX(bs->bl.opt_transfer_length,
565 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100566 bs->bl.max_transfer_length =
567 MIN_NON_ZERO(bs->bl.max_transfer_length,
568 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100569 bs->bl.opt_mem_alignment =
570 MAX(bs->bl.opt_mem_alignment,
571 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100572 }
573
574 /* Then let the driver override it */
575 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200576 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100577 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100578}
579
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100580/**
581 * Try to get @bs's logical and physical block size.
582 * On success, store them in @bsz struct and return 0.
583 * On failure return -errno.
584 * @bs must not be empty.
585 */
586int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
587{
588 BlockDriver *drv = bs->drv;
589
590 if (drv && drv->bdrv_probe_blocksizes) {
591 return drv->bdrv_probe_blocksizes(bs, bsz);
592 }
593
594 return -ENOTSUP;
595}
596
597/**
598 * Try to get @bs's geometry (cyls, heads, sectors).
599 * On success, store them in @geo struct and return 0.
600 * On failure return -errno.
601 * @bs must not be empty.
602 */
603int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
604{
605 BlockDriver *drv = bs->drv;
606
607 if (drv && drv->bdrv_probe_geometry) {
608 return drv->bdrv_probe_geometry(bs, geo);
609 }
610
611 return -ENOTSUP;
612}
613
Jim Meyeringeba25052012-05-28 09:27:54 +0200614/*
615 * Create a uniquely-named empty temporary file.
616 * Return 0 upon success, otherwise a negative errno value.
617 */
618int get_tmp_filename(char *filename, int size)
619{
bellardd5249392004-08-03 21:14:23 +0000620#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000621 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200622 /* GetTempFileName requires that its output buffer (4th param)
623 have length MAX_PATH or greater. */
624 assert(size >= MAX_PATH);
625 return (GetTempPath(MAX_PATH, temp_dir)
626 && GetTempFileName(temp_dir, "qem", 0, filename)
627 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000628#else
bellardea2384d2004-08-01 21:59:26 +0000629 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000630 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000631 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530632 if (!tmpdir) {
633 tmpdir = "/var/tmp";
634 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200635 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
636 return -EOVERFLOW;
637 }
bellardea2384d2004-08-01 21:59:26 +0000638 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800639 if (fd < 0) {
640 return -errno;
641 }
642 if (close(fd) != 0) {
643 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200644 return -errno;
645 }
646 return 0;
bellardd5249392004-08-03 21:14:23 +0000647#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200648}
bellardea2384d2004-08-01 21:59:26 +0000649
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200650/*
651 * Detect host devices. By convention, /dev/cdrom[N] is always
652 * recognized as a host CDROM.
653 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200654static BlockDriver *find_hdev_driver(const char *filename)
655{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200656 int score_max = 0, score;
657 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200658
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100659 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200660 if (d->bdrv_probe_device) {
661 score = d->bdrv_probe_device(filename);
662 if (score > score_max) {
663 score_max = score;
664 drv = d;
665 }
666 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200667 }
668
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200669 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200670}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200671
Kevin Wolf98289622013-07-10 15:47:39 +0200672BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500673 bool allow_protocol_prefix,
674 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200675{
676 BlockDriver *drv1;
677 char protocol[128];
678 int len;
679 const char *p;
680
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200681 /* TODO Drivers without bdrv_file_open must be specified explicitly */
682
Christoph Hellwig39508e72010-06-23 12:25:17 +0200683 /*
684 * XXX(hch): we really should not let host device detection
685 * override an explicit protocol specification, but moving this
686 * later breaks access to device names with colons in them.
687 * Thanks to the brain-dead persistent naming schemes on udev-
688 * based Linux systems those actually are quite common.
689 */
690 drv1 = find_hdev_driver(filename);
691 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200692 return drv1;
693 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200694
Kevin Wolf98289622013-07-10 15:47:39 +0200695 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100696 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200697 }
Kevin Wolf98289622013-07-10 15:47:39 +0200698
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000699 p = strchr(filename, ':');
700 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200701 len = p - filename;
702 if (len > sizeof(protocol) - 1)
703 len = sizeof(protocol) - 1;
704 memcpy(protocol, filename, len);
705 protocol[len] = '\0';
706 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
707 if (drv1->protocol_name &&
708 !strcmp(drv1->protocol_name, protocol)) {
709 return drv1;
710 }
711 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500712
713 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200714 return NULL;
715}
716
Markus Armbrusterc6684242014-11-20 16:27:10 +0100717/*
718 * Guess image format by probing its contents.
719 * This is not a good idea when your image is raw (CVE-2008-2004), but
720 * we do it anyway for backward compatibility.
721 *
722 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100723 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
724 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100725 * @filename is its filename.
726 *
727 * For all block drivers, call the bdrv_probe() method to get its
728 * probing score.
729 * Return the first block driver with the highest probing score.
730 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100731BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
732 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100733{
734 int score_max = 0, score;
735 BlockDriver *drv = NULL, *d;
736
737 QLIST_FOREACH(d, &bdrv_drivers, list) {
738 if (d->bdrv_probe) {
739 score = d->bdrv_probe(buf, buf_size, filename);
740 if (score > score_max) {
741 score_max = score;
742 drv = d;
743 }
744 }
745 }
746
747 return drv;
748}
749
Kevin Wolff500a6d2012-11-12 17:35:27 +0100750static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200751 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000752{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100753 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100754 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100755 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700756
Kevin Wolf08a00552010-06-01 18:37:31 +0200757 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100758 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100759 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200760 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700761 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700762
bellard83f64092006-08-01 16:21:11 +0000763 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000764 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200765 error_setg_errno(errp, -ret, "Could not read image for determining its "
766 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200767 *pdrv = NULL;
768 return ret;
bellard83f64092006-08-01 16:21:11 +0000769 }
770
Markus Armbrusterc6684242014-11-20 16:27:10 +0100771 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200772 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200773 error_setg(errp, "Could not determine image format: No compatible "
774 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200775 ret = -ENOENT;
776 }
777 *pdrv = drv;
778 return ret;
bellardea2384d2004-08-01 21:59:26 +0000779}
780
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100781/**
782 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200783 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100784 */
785static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
786{
787 BlockDriver *drv = bs->drv;
788
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700789 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
790 if (bs->sg)
791 return 0;
792
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100793 /* query actual device if possible, otherwise just trust the hint */
794 if (drv->bdrv_getlength) {
795 int64_t length = drv->bdrv_getlength(bs);
796 if (length < 0) {
797 return length;
798 }
Fam Zheng7e382002013-11-06 19:48:06 +0800799 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100800 }
801
802 bs->total_sectors = hint;
803 return 0;
804}
805
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100806/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100807 * Set open flags for a given discard mode
808 *
809 * Return 0 on success, -1 if the discard mode was invalid.
810 */
811int bdrv_parse_discard_flags(const char *mode, int *flags)
812{
813 *flags &= ~BDRV_O_UNMAP;
814
815 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
816 /* do nothing */
817 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
818 *flags |= BDRV_O_UNMAP;
819 } else {
820 return -1;
821 }
822
823 return 0;
824}
825
826/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100827 * Set open flags for a given cache mode
828 *
829 * Return 0 on success, -1 if the cache mode was invalid.
830 */
831int bdrv_parse_cache_flags(const char *mode, int *flags)
832{
833 *flags &= ~BDRV_O_CACHE_MASK;
834
835 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
836 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100837 } else if (!strcmp(mode, "directsync")) {
838 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100839 } else if (!strcmp(mode, "writeback")) {
840 *flags |= BDRV_O_CACHE_WB;
841 } else if (!strcmp(mode, "unsafe")) {
842 *flags |= BDRV_O_CACHE_WB;
843 *flags |= BDRV_O_NO_FLUSH;
844 } else if (!strcmp(mode, "writethrough")) {
845 /* this is the default */
846 } else {
847 return -1;
848 }
849
850 return 0;
851}
852
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000853/**
854 * The copy-on-read flag is actually a reference count so multiple users may
855 * use the feature without worrying about clobbering its previous state.
856 * Copy-on-read stays enabled until all users have called to disable it.
857 */
858void bdrv_enable_copy_on_read(BlockDriverState *bs)
859{
860 bs->copy_on_read++;
861}
862
863void bdrv_disable_copy_on_read(BlockDriverState *bs)
864{
865 assert(bs->copy_on_read > 0);
866 bs->copy_on_read--;
867}
868
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200869/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200870 * Returns the flags that a temporary snapshot should get, based on the
871 * originally requested flags (the originally requested image will have flags
872 * like a backing file)
873 */
874static int bdrv_temp_snapshot_flags(int flags)
875{
876 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
877}
878
879/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200880 * Returns the flags that bs->file should get, based on the given flags for
881 * the parent BDS
882 */
883static int bdrv_inherited_flags(int flags)
884{
885 /* Enable protocol handling, disable format probing for bs->file */
886 flags |= BDRV_O_PROTOCOL;
887
888 /* Our block drivers take care to send flushes and respect unmap policy,
889 * so we can enable both unconditionally on lower layers. */
890 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
891
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200892 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200893 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200894
895 return flags;
896}
897
Kevin Wolf317fc442014-04-25 13:27:34 +0200898/*
899 * Returns the flags that bs->backing_hd should get, based on the given flags
900 * for the parent BDS
901 */
902static int bdrv_backing_flags(int flags)
903{
904 /* backing files always opened read-only */
905 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
906
907 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200908 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200909
910 return flags;
911}
912
Kevin Wolf7b272452012-11-12 17:05:39 +0100913static int bdrv_open_flags(BlockDriverState *bs, int flags)
914{
915 int open_flags = flags | BDRV_O_CACHE_WB;
916
917 /*
918 * Clear flags that are internal to the block layer before opening the
919 * image.
920 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200921 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100922
923 /*
924 * Snapshots should be writable.
925 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200926 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100927 open_flags |= BDRV_O_RDWR;
928 }
929
930 return open_flags;
931}
932
Kevin Wolf636ea372014-01-24 14:11:52 +0100933static void bdrv_assign_node_name(BlockDriverState *bs,
934 const char *node_name,
935 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100936{
937 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100938 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100939 }
940
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200941 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200942 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200943 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100944 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100945 }
946
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100947 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200948 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100949 error_setg(errp, "node-name=%s is conflicting with a device id",
950 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100951 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100952 }
953
Benoît Canet6913c0c2014-01-23 21:31:33 +0100954 /* takes care of avoiding duplicates node names */
955 if (bdrv_find_node(node_name)) {
956 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100957 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100958 }
959
960 /* copy node name into the bs and insert it into the graph list */
961 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
962 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100963}
964
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200965/*
Kevin Wolf57915332010-04-14 15:24:50 +0200966 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100967 *
968 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200969 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100970static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200971 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200972{
973 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200974 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100975 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200976 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200977
978 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200979 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100980 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200981
Kevin Wolf45673672013-04-22 17:48:40 +0200982 if (file != NULL) {
983 filename = file->filename;
984 } else {
985 filename = qdict_get_try_str(options, "filename");
986 }
987
Kevin Wolf765003d2014-02-03 14:49:42 +0100988 if (drv->bdrv_needs_filename && !filename) {
989 error_setg(errp, "The '%s' block driver requires a file name",
990 drv->format_name);
991 return -EINVAL;
992 }
993
Kevin Wolf45673672013-04-22 17:48:40 +0200994 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100995
Benoît Canet6913c0c2014-01-23 21:31:33 +0100996 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100997 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200998 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100999 error_propagate(errp, local_err);
1000 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +01001001 }
1002 qdict_del(options, "node-name");
1003
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001004 /* bdrv_open() with directly using a protocol as drv. This layer is already
1005 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1006 * and return immediately. */
1007 if (file != NULL && drv->bdrv_file_open) {
1008 bdrv_swap(file, bs);
1009 return 0;
1010 }
1011
Kevin Wolf57915332010-04-14 15:24:50 +02001012 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001013 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001014 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001015 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001016 open_flags = bdrv_open_flags(bs, flags);
1017 bs->read_only = !(open_flags & BDRV_O_RDWR);
1018
1019 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001020 error_setg(errp,
1021 !bs->read_only && bdrv_is_whitelisted(drv, true)
1022 ? "Driver '%s' can only be used for read-only devices"
1023 : "Driver '%s' is not whitelisted",
1024 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001025 return -ENOTSUP;
1026 }
Kevin Wolf57915332010-04-14 15:24:50 +02001027
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001028 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001029 if (flags & BDRV_O_COPY_ON_READ) {
1030 if (!bs->read_only) {
1031 bdrv_enable_copy_on_read(bs);
1032 } else {
1033 error_setg(errp, "Can't use copy-on-read on read-only device");
1034 return -EINVAL;
1035 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001036 }
1037
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001038 if (filename != NULL) {
1039 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1040 } else {
1041 bs->filename[0] = '\0';
1042 }
Max Reitz91af7012014-07-18 20:24:56 +02001043 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001044
Kevin Wolf57915332010-04-14 15:24:50 +02001045 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001046 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001047
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001048 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001049
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001050 /* Open the image, either directly or using a protocol */
1051 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001052 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001053 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001054 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001055 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001056 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001057 error_setg(errp, "Can't use '%s' as a block driver for the "
1058 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001059 ret = -EINVAL;
1060 goto free_and_fail;
1061 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001062 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001063 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001064 }
1065
Kevin Wolf57915332010-04-14 15:24:50 +02001066 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001067 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001068 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001069 } else if (bs->filename[0]) {
1070 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001071 } else {
1072 error_setg_errno(errp, -ret, "Could not open image");
1073 }
Kevin Wolf57915332010-04-14 15:24:50 +02001074 goto free_and_fail;
1075 }
1076
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001077 if (bs->encrypted) {
1078 error_report("Encrypted images are deprecated");
1079 error_printf("Support for them will be removed in a future release.\n"
1080 "You can use 'qemu-img convert' to convert your image"
1081 " to an unencrypted one.\n");
1082 }
1083
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001084 ret = refresh_total_sectors(bs, bs->total_sectors);
1085 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001086 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001087 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001088 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001089
Kevin Wolf3baca892014-07-16 17:48:16 +02001090 bdrv_refresh_limits(bs, &local_err);
1091 if (local_err) {
1092 error_propagate(errp, local_err);
1093 ret = -EINVAL;
1094 goto free_and_fail;
1095 }
1096
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001097 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001098 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001099 return 0;
1100
1101free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001102 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001103 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001104 bs->opaque = NULL;
1105 bs->drv = NULL;
1106 return ret;
1107}
1108
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001109static QDict *parse_json_filename(const char *filename, Error **errp)
1110{
1111 QObject *options_obj;
1112 QDict *options;
1113 int ret;
1114
1115 ret = strstart(filename, "json:", &filename);
1116 assert(ret);
1117
1118 options_obj = qobject_from_json(filename);
1119 if (!options_obj) {
1120 error_setg(errp, "Could not parse the JSON options");
1121 return NULL;
1122 }
1123
1124 if (qobject_type(options_obj) != QTYPE_QDICT) {
1125 qobject_decref(options_obj);
1126 error_setg(errp, "Invalid JSON object given");
1127 return NULL;
1128 }
1129
1130 options = qobject_to_qdict(options_obj);
1131 qdict_flatten(options);
1132
1133 return options;
1134}
1135
Kevin Wolf57915332010-04-14 15:24:50 +02001136/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001137 * Fills in default options for opening images and converts the legacy
1138 * filename/flags pair to option QDict entries.
1139 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001140static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001141 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001142{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001143 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001144 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001145 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001146 bool parse_filename = false;
1147 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001148
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001149 /* Parse json: pseudo-protocol */
1150 if (filename && g_str_has_prefix(filename, "json:")) {
1151 QDict *json_options = parse_json_filename(filename, &local_err);
1152 if (local_err) {
1153 error_propagate(errp, local_err);
1154 return -EINVAL;
1155 }
1156
1157 /* Options given in the filename have lower priority than options
1158 * specified directly */
1159 qdict_join(*options, json_options, false);
1160 QDECREF(json_options);
1161 *pfilename = filename = NULL;
1162 }
1163
Kevin Wolff54120f2014-05-26 11:09:59 +02001164 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001165 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001166 if (!qdict_haskey(*options, "filename")) {
1167 qdict_put(*options, "filename", qstring_from_str(filename));
1168 parse_filename = true;
1169 } else {
1170 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1171 "the same time");
1172 return -EINVAL;
1173 }
1174 }
1175
1176 /* Find the right block driver */
1177 filename = qdict_get_try_str(*options, "filename");
1178 drvname = qdict_get_try_str(*options, "driver");
1179
Kevin Wolf17b005f2014-05-27 10:50:29 +02001180 if (drv) {
1181 if (drvname) {
1182 error_setg(errp, "Driver specified twice");
1183 return -EINVAL;
1184 }
1185 drvname = drv->format_name;
1186 qdict_put(*options, "driver", qstring_from_str(drvname));
1187 } else {
1188 if (!drvname && protocol) {
1189 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001190 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001191 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001192 return -EINVAL;
1193 }
1194
1195 drvname = drv->format_name;
1196 qdict_put(*options, "driver", qstring_from_str(drvname));
1197 } else {
1198 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001199 return -EINVAL;
1200 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001201 } else if (drvname) {
1202 drv = bdrv_find_format(drvname);
1203 if (!drv) {
1204 error_setg(errp, "Unknown driver '%s'", drvname);
1205 return -ENOENT;
1206 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001207 }
1208 }
1209
Kevin Wolf17b005f2014-05-27 10:50:29 +02001210 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001211
1212 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001213 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001214 drv->bdrv_parse_filename(filename, *options, &local_err);
1215 if (local_err) {
1216 error_propagate(errp, local_err);
1217 return -EINVAL;
1218 }
1219
1220 if (!drv->bdrv_needs_filename) {
1221 qdict_del(*options, "filename");
1222 }
1223 }
1224
1225 return 0;
1226}
1227
Fam Zheng8d24cce2014-05-23 21:29:45 +08001228void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1229{
1230
Fam Zheng826b6ca2014-05-23 21:29:47 +08001231 if (bs->backing_hd) {
1232 assert(bs->backing_blocker);
1233 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1234 } else if (backing_hd) {
1235 error_setg(&bs->backing_blocker,
Alberto Garcia81e5f782015-04-08 12:29:19 +03001236 "node is used as backing hd of '%s'",
1237 bdrv_get_device_or_node_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001238 }
1239
Fam Zheng8d24cce2014-05-23 21:29:45 +08001240 bs->backing_hd = backing_hd;
1241 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001242 error_free(bs->backing_blocker);
1243 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001244 goto out;
1245 }
1246 bs->open_flags &= ~BDRV_O_NO_BACKING;
1247 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1248 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1249 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001250
1251 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1252 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001253 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001254 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001255out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001256 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001257}
1258
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001259/*
1260 * Opens the backing file for a BlockDriverState if not yet open
1261 *
1262 * options is a QDict of options to pass to the block drivers, or NULL for an
1263 * empty set of options. The reference to the QDict is transferred to this
1264 * function (even on failure), so if the caller intends to reuse the dictionary,
1265 * it needs to use QINCREF() before calling bdrv_file_open.
1266 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001267int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001268{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001269 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001270 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001271 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001272 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001273
1274 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001275 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001276 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001277 }
1278
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001279 /* NULL means an empty set of options */
1280 if (options == NULL) {
1281 options = qdict_new();
1282 }
1283
Paolo Bonzini9156df12012-10-18 16:49:17 +02001284 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001285 if (qdict_haskey(options, "file.filename")) {
1286 backing_filename[0] = '\0';
1287 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001288 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001289 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001290 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001291 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1292 &local_err);
1293 if (local_err) {
1294 ret = -EINVAL;
1295 error_propagate(errp, local_err);
1296 QDECREF(options);
1297 goto free_exit;
1298 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001299 }
1300
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001301 if (!bs->drv || !bs->drv->supports_backing) {
1302 ret = -EINVAL;
1303 error_setg(errp, "Driver doesn't support backing files");
1304 QDECREF(options);
1305 goto free_exit;
1306 }
1307
Markus Armbrustere4e99862014-10-07 13:59:03 +02001308 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001309
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001310 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1311 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001312 }
1313
Max Reitzf67503e2014-02-18 18:33:05 +01001314 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001315 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001316 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001317 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001318 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001319 bdrv_unref(backing_hd);
1320 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001321 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001322 error_setg(errp, "Could not open backing file: %s",
1323 error_get_pretty(local_err));
1324 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001325 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001326 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001327 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001328
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001329free_exit:
1330 g_free(backing_filename);
1331 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001332}
1333
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001334/*
Max Reitzda557aa2013-12-20 19:28:11 +01001335 * Opens a disk image whose options are given as BlockdevRef in another block
1336 * device's options.
1337 *
Max Reitzda557aa2013-12-20 19:28:11 +01001338 * If allow_none is true, no image will be opened if filename is false and no
1339 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1340 *
1341 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1342 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1343 * itself, all options starting with "${bdref_key}." are considered part of the
1344 * BlockdevRef.
1345 *
1346 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001347 *
1348 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001349 */
1350int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1351 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001352 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001353{
1354 QDict *image_options;
1355 int ret;
1356 char *bdref_key_dot;
1357 const char *reference;
1358
Max Reitzf67503e2014-02-18 18:33:05 +01001359 assert(pbs);
1360 assert(*pbs == NULL);
1361
Max Reitzda557aa2013-12-20 19:28:11 +01001362 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1363 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1364 g_free(bdref_key_dot);
1365
1366 reference = qdict_get_try_str(options, bdref_key);
1367 if (!filename && !reference && !qdict_size(image_options)) {
1368 if (allow_none) {
1369 ret = 0;
1370 } else {
1371 error_setg(errp, "A block device must be specified for \"%s\"",
1372 bdref_key);
1373 ret = -EINVAL;
1374 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001375 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001376 goto done;
1377 }
1378
Max Reitzf7d9fd82014-02-18 18:33:12 +01001379 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001380
1381done:
1382 qdict_del(options, bdref_key);
1383 return ret;
1384}
1385
Chen Gang6b8aeca2014-06-23 23:28:23 +08001386int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001387{
1388 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001389 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001390 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001391 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001392 QDict *snapshot_options;
1393 BlockDriverState *bs_snapshot;
1394 Error *local_err;
1395 int ret;
1396
1397 /* if snapshot, we create a temporary backing file and open it
1398 instead of opening 'filename' directly */
1399
1400 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001401 total_size = bdrv_getlength(bs);
1402 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001403 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001404 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001405 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001406 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001407
1408 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001409 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001410 if (ret < 0) {
1411 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001412 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001413 }
1414
Max Reitzef810432014-12-02 18:32:42 +01001415 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001416 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001417 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001418 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001419 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001420 if (ret < 0) {
1421 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1422 "'%s': %s", tmp_filename,
1423 error_get_pretty(local_err));
1424 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001425 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001426 }
1427
1428 /* Prepare a new options QDict for the temporary file */
1429 snapshot_options = qdict_new();
1430 qdict_put(snapshot_options, "file.driver",
1431 qstring_from_str("file"));
1432 qdict_put(snapshot_options, "file.filename",
1433 qstring_from_str(tmp_filename));
1434
Markus Armbrustere4e99862014-10-07 13:59:03 +02001435 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001436
1437 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001438 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001439 if (ret < 0) {
1440 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001441 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001442 }
1443
1444 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001445
1446out:
1447 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001448 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001449}
1450
Max Reitzda557aa2013-12-20 19:28:11 +01001451/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001452 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001453 *
1454 * options is a QDict of options to pass to the block drivers, or NULL for an
1455 * empty set of options. The reference to the QDict belongs to the block layer
1456 * after the call (even on failure), so if the caller intends to reuse the
1457 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001458 *
1459 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1460 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001461 *
1462 * The reference parameter may be used to specify an existing block device which
1463 * should be opened. If specified, neither options nor a filename may be given,
1464 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001465 */
Max Reitzddf56362014-02-18 18:33:06 +01001466int bdrv_open(BlockDriverState **pbs, const char *filename,
1467 const char *reference, QDict *options, int flags,
1468 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001469{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001470 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001471 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001472 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001473 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001474 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001475
Max Reitzf67503e2014-02-18 18:33:05 +01001476 assert(pbs);
1477
Max Reitzddf56362014-02-18 18:33:06 +01001478 if (reference) {
1479 bool options_non_empty = options ? qdict_size(options) : false;
1480 QDECREF(options);
1481
1482 if (*pbs) {
1483 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1484 "another block device");
1485 return -EINVAL;
1486 }
1487
1488 if (filename || options_non_empty) {
1489 error_setg(errp, "Cannot reference an existing block device with "
1490 "additional options or a new filename");
1491 return -EINVAL;
1492 }
1493
1494 bs = bdrv_lookup_bs(reference, reference, errp);
1495 if (!bs) {
1496 return -ENODEV;
1497 }
1498 bdrv_ref(bs);
1499 *pbs = bs;
1500 return 0;
1501 }
1502
Max Reitzf67503e2014-02-18 18:33:05 +01001503 if (*pbs) {
1504 bs = *pbs;
1505 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001506 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001507 }
1508
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001509 /* NULL means an empty set of options */
1510 if (options == NULL) {
1511 options = qdict_new();
1512 }
1513
Kevin Wolf17b005f2014-05-27 10:50:29 +02001514 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001515 if (local_err) {
1516 goto fail;
1517 }
1518
Kevin Wolf76c591b2014-06-04 14:19:44 +02001519 /* Find the right image format driver */
1520 drv = NULL;
1521 drvname = qdict_get_try_str(options, "driver");
1522 if (drvname) {
1523 drv = bdrv_find_format(drvname);
1524 qdict_del(options, "driver");
1525 if (!drv) {
1526 error_setg(errp, "Unknown driver: '%s'", drvname);
1527 ret = -EINVAL;
1528 goto fail;
1529 }
1530 }
1531
1532 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1533 if (drv && !drv->bdrv_file_open) {
1534 /* If the user explicitly wants a format driver here, we'll need to add
1535 * another layer for the protocol in bs->file */
1536 flags &= ~BDRV_O_PROTOCOL;
1537 }
1538
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001539 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001540 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001541
Kevin Wolff4788ad2014-06-03 16:44:19 +02001542 /* Open image file without format layer */
1543 if ((flags & BDRV_O_PROTOCOL) == 0) {
1544 if (flags & BDRV_O_RDWR) {
1545 flags |= BDRV_O_ALLOW_RDWR;
1546 }
1547 if (flags & BDRV_O_SNAPSHOT) {
1548 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1549 flags = bdrv_backing_flags(flags);
1550 }
1551
1552 assert(file == NULL);
1553 ret = bdrv_open_image(&file, filename, options, "file",
1554 bdrv_inherited_flags(flags),
1555 true, &local_err);
1556 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001557 goto fail;
1558 }
1559 }
1560
Kevin Wolf76c591b2014-06-04 14:19:44 +02001561 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001562 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001563 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001564 ret = find_image_format(file, filename, &drv, &local_err);
1565 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001566 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001567 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001568 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001569 error_setg(errp, "Must specify either driver or file");
1570 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001571 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001572 }
1573
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001574 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001575 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001576 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001577 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001578 }
1579
Max Reitz2a05cbe2013-12-20 19:28:10 +01001580 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001581 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001582 file = NULL;
1583 }
1584
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001585 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001586 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001587 QDict *backing_options;
1588
Benoît Canet5726d872013-09-25 13:30:01 +02001589 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001590 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001591 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001592 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001593 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001594 }
1595
Max Reitz91af7012014-07-18 20:24:56 +02001596 bdrv_refresh_filename(bs);
1597
Kevin Wolfb9988752014-04-03 12:09:34 +02001598 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1599 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001600 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001601 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001602 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001603 goto close_and_fail;
1604 }
1605 }
1606
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001607 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001608 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001609 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001610 if (flags & BDRV_O_PROTOCOL) {
1611 error_setg(errp, "Block protocol '%s' doesn't support the option "
1612 "'%s'", drv->format_name, entry->key);
1613 } else {
1614 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1615 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001616 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001617 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001618
1619 ret = -EINVAL;
1620 goto close_and_fail;
1621 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001622
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001623 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001624 if (bs->blk) {
1625 blk_dev_change_media_cb(bs->blk, true);
1626 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001627 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1628 && !runstate_check(RUN_STATE_INMIGRATE)
1629 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1630 error_setg(errp,
1631 "Guest must be stopped for opening of encrypted image");
1632 ret = -EBUSY;
1633 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001634 }
1635
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001636 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001637 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001638 return 0;
1639
Kevin Wolf8bfea152014-04-11 19:16:36 +02001640fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001641 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001642 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001643 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001644 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001645 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001646 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001647 if (!*pbs) {
1648 /* If *pbs is NULL, a new BDS has been created in this function and
1649 needs to be freed now. Otherwise, it does not need to be closed,
1650 since it has not really been opened yet. */
1651 bdrv_unref(bs);
1652 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001653 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001654 error_propagate(errp, local_err);
1655 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001656 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001657
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001658close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001659 /* See fail path, but now the BDS has to be always closed */
1660 if (*pbs) {
1661 bdrv_close(bs);
1662 } else {
1663 bdrv_unref(bs);
1664 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001665 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001666 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001667 error_propagate(errp, local_err);
1668 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001669 return ret;
1670}
1671
Jeff Codye971aa12012-09-20 15:13:19 -04001672typedef struct BlockReopenQueueEntry {
1673 bool prepared;
1674 BDRVReopenState state;
1675 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1676} BlockReopenQueueEntry;
1677
1678/*
1679 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1680 * reopen of multiple devices.
1681 *
1682 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1683 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1684 * be created and initialized. This newly created BlockReopenQueue should be
1685 * passed back in for subsequent calls that are intended to be of the same
1686 * atomic 'set'.
1687 *
1688 * bs is the BlockDriverState to add to the reopen queue.
1689 *
1690 * flags contains the open flags for the associated bs
1691 *
1692 * returns a pointer to bs_queue, which is either the newly allocated
1693 * bs_queue, or the existing bs_queue being used.
1694 *
1695 */
1696BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1697 BlockDriverState *bs, int flags)
1698{
1699 assert(bs != NULL);
1700
1701 BlockReopenQueueEntry *bs_entry;
1702 if (bs_queue == NULL) {
1703 bs_queue = g_new0(BlockReopenQueue, 1);
1704 QSIMPLEQ_INIT(bs_queue);
1705 }
1706
Kevin Wolff1f25a22014-04-25 19:04:55 +02001707 /* bdrv_open() masks this flag out */
1708 flags &= ~BDRV_O_PROTOCOL;
1709
Jeff Codye971aa12012-09-20 15:13:19 -04001710 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001711 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001712 }
1713
1714 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1715 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1716
1717 bs_entry->state.bs = bs;
1718 bs_entry->state.flags = flags;
1719
1720 return bs_queue;
1721}
1722
1723/*
1724 * Reopen multiple BlockDriverStates atomically & transactionally.
1725 *
1726 * The queue passed in (bs_queue) must have been built up previous
1727 * via bdrv_reopen_queue().
1728 *
1729 * Reopens all BDS specified in the queue, with the appropriate
1730 * flags. All devices are prepared for reopen, and failure of any
1731 * device will cause all device changes to be abandonded, and intermediate
1732 * data cleaned up.
1733 *
1734 * If all devices prepare successfully, then the changes are committed
1735 * to all devices.
1736 *
1737 */
1738int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1739{
1740 int ret = -1;
1741 BlockReopenQueueEntry *bs_entry, *next;
1742 Error *local_err = NULL;
1743
1744 assert(bs_queue != NULL);
1745
1746 bdrv_drain_all();
1747
1748 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1749 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1750 error_propagate(errp, local_err);
1751 goto cleanup;
1752 }
1753 bs_entry->prepared = true;
1754 }
1755
1756 /* If we reach this point, we have success and just need to apply the
1757 * changes
1758 */
1759 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1760 bdrv_reopen_commit(&bs_entry->state);
1761 }
1762
1763 ret = 0;
1764
1765cleanup:
1766 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1767 if (ret && bs_entry->prepared) {
1768 bdrv_reopen_abort(&bs_entry->state);
1769 }
1770 g_free(bs_entry);
1771 }
1772 g_free(bs_queue);
1773 return ret;
1774}
1775
1776
1777/* Reopen a single BlockDriverState with the specified flags. */
1778int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1779{
1780 int ret = -1;
1781 Error *local_err = NULL;
1782 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1783
1784 ret = bdrv_reopen_multiple(queue, &local_err);
1785 if (local_err != NULL) {
1786 error_propagate(errp, local_err);
1787 }
1788 return ret;
1789}
1790
1791
1792/*
1793 * Prepares a BlockDriverState for reopen. All changes are staged in the
1794 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1795 * the block driver layer .bdrv_reopen_prepare()
1796 *
1797 * bs is the BlockDriverState to reopen
1798 * flags are the new open flags
1799 * queue is the reopen queue
1800 *
1801 * Returns 0 on success, non-zero on error. On error errp will be set
1802 * as well.
1803 *
1804 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1805 * It is the responsibility of the caller to then call the abort() or
1806 * commit() for any other BDS that have been left in a prepare() state
1807 *
1808 */
1809int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1810 Error **errp)
1811{
1812 int ret = -1;
1813 Error *local_err = NULL;
1814 BlockDriver *drv;
1815
1816 assert(reopen_state != NULL);
1817 assert(reopen_state->bs->drv != NULL);
1818 drv = reopen_state->bs->drv;
1819
1820 /* if we are to stay read-only, do not allow permission change
1821 * to r/w */
1822 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1823 reopen_state->flags & BDRV_O_RDWR) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03001824 error_setg(errp, "Node '%s' is read only",
1825 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001826 goto error;
1827 }
1828
1829
1830 ret = bdrv_flush(reopen_state->bs);
1831 if (ret) {
1832 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1833 strerror(-ret));
1834 goto error;
1835 }
1836
1837 if (drv->bdrv_reopen_prepare) {
1838 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1839 if (ret) {
1840 if (local_err != NULL) {
1841 error_propagate(errp, local_err);
1842 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001843 error_setg(errp, "failed while preparing to reopen image '%s'",
1844 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001845 }
1846 goto error;
1847 }
1848 } else {
1849 /* It is currently mandatory to have a bdrv_reopen_prepare()
1850 * handler for each supported drv. */
Alberto Garcia81e5f782015-04-08 12:29:19 +03001851 error_setg(errp, "Block format '%s' used by node '%s' "
1852 "does not support reopening files", drv->format_name,
1853 bdrv_get_device_or_node_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001854 ret = -1;
1855 goto error;
1856 }
1857
1858 ret = 0;
1859
1860error:
1861 return ret;
1862}
1863
1864/*
1865 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1866 * makes them final by swapping the staging BlockDriverState contents into
1867 * the active BlockDriverState contents.
1868 */
1869void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1870{
1871 BlockDriver *drv;
1872
1873 assert(reopen_state != NULL);
1874 drv = reopen_state->bs->drv;
1875 assert(drv != NULL);
1876
1877 /* If there are any driver level actions to take */
1878 if (drv->bdrv_reopen_commit) {
1879 drv->bdrv_reopen_commit(reopen_state);
1880 }
1881
1882 /* set BDS specific flags now */
1883 reopen_state->bs->open_flags = reopen_state->flags;
1884 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1885 BDRV_O_CACHE_WB);
1886 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001887
Kevin Wolf3baca892014-07-16 17:48:16 +02001888 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001889}
1890
1891/*
1892 * Abort the reopen, and delete and free the staged changes in
1893 * reopen_state
1894 */
1895void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1896{
1897 BlockDriver *drv;
1898
1899 assert(reopen_state != NULL);
1900 drv = reopen_state->bs->drv;
1901 assert(drv != NULL);
1902
1903 if (drv->bdrv_reopen_abort) {
1904 drv->bdrv_reopen_abort(reopen_state);
1905 }
1906}
1907
1908
bellardfc01f7e2003-06-30 10:03:06 +00001909void bdrv_close(BlockDriverState *bs)
1910{
Max Reitz33384422014-06-20 21:57:33 +02001911 BdrvAioNotifier *ban, *ban_next;
1912
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001913 if (bs->job) {
1914 block_job_cancel_sync(bs->job);
1915 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001916 bdrv_drain_all(); /* complete I/O */
1917 bdrv_flush(bs);
1918 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001919 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001920
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001921 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001922 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001923 BlockDriverState *backing_hd = bs->backing_hd;
1924 bdrv_set_backing_hd(bs, NULL);
1925 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001926 }
bellardea2384d2004-08-01 21:59:26 +00001927 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001928 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001929 bs->opaque = NULL;
1930 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001931 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001932 bs->backing_file[0] = '\0';
1933 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001934 bs->total_sectors = 0;
1935 bs->encrypted = 0;
1936 bs->valid_key = 0;
1937 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001938 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001939 QDECREF(bs->options);
1940 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001941 QDECREF(bs->full_open_options);
1942 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001943
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001944 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001945 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001946 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001947 }
bellardb3380822004-03-14 21:38:54 +00001948 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001949
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001950 if (bs->blk) {
1951 blk_dev_change_media_cb(bs->blk, false);
1952 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001953
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001954 /*throttling disk I/O limits*/
1955 if (bs->io_limits_enabled) {
1956 bdrv_io_limits_disable(bs);
1957 }
Max Reitz33384422014-06-20 21:57:33 +02001958
1959 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1960 g_free(ban);
1961 }
1962 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001963}
1964
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001965void bdrv_close_all(void)
1966{
1967 BlockDriverState *bs;
1968
Benoît Canetdc364f42014-01-23 21:31:32 +01001969 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001970 AioContext *aio_context = bdrv_get_aio_context(bs);
1971
1972 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001973 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001974 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001975 }
1976}
1977
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001978/* Check if any requests are in-flight (including throttled requests) */
1979static bool bdrv_requests_pending(BlockDriverState *bs)
1980{
1981 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1982 return true;
1983 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001984 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1985 return true;
1986 }
1987 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001988 return true;
1989 }
1990 if (bs->file && bdrv_requests_pending(bs->file)) {
1991 return true;
1992 }
1993 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1994 return true;
1995 }
1996 return false;
1997}
1998
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01001999static bool bdrv_drain_one(BlockDriverState *bs)
2000{
2001 bool bs_busy;
2002
2003 bdrv_flush_io_queue(bs);
2004 bdrv_start_throttled_reqs(bs);
2005 bs_busy = bdrv_requests_pending(bs);
2006 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2007 return bs_busy;
2008}
2009
2010/*
2011 * Wait for pending requests to complete on a single BlockDriverState subtree
2012 *
2013 * See the warning in bdrv_drain_all(). This function can only be called if
2014 * you are sure nothing can generate I/O because you have op blockers
2015 * installed.
2016 *
2017 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2018 * AioContext.
2019 */
2020void bdrv_drain(BlockDriverState *bs)
2021{
2022 while (bdrv_drain_one(bs)) {
2023 /* Keep iterating */
2024 }
2025}
2026
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002027/*
2028 * Wait for pending requests to complete across all BlockDriverStates
2029 *
2030 * This function does not flush data to disk, use bdrv_flush_all() for that
2031 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002032 *
2033 * Note that completion of an asynchronous I/O operation can trigger any
2034 * number of other I/O operations on other devices---for example a coroutine
2035 * can be arbitrarily complex and a constant flow of I/O can come until the
2036 * coroutine is complete. Because of this, it is not possible to have a
2037 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002038 */
2039void bdrv_drain_all(void)
2040{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002041 /* Always run first iteration so any pending completion BHs run */
2042 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002043 BlockDriverState *bs;
2044
Fam Zheng69da3b02015-04-03 22:05:19 +08002045 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2046 AioContext *aio_context = bdrv_get_aio_context(bs);
2047
2048 aio_context_acquire(aio_context);
2049 if (bs->job) {
2050 block_job_pause(bs->job);
2051 }
2052 aio_context_release(aio_context);
2053 }
2054
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002055 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002056 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002057
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002058 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2059 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002060
2061 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002062 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002063 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002064 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002065 }
Fam Zheng69da3b02015-04-03 22:05:19 +08002066
2067 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2068 AioContext *aio_context = bdrv_get_aio_context(bs);
2069
2070 aio_context_acquire(aio_context);
2071 if (bs->job) {
2072 block_job_resume(bs->job);
2073 }
2074 aio_context_release(aio_context);
2075 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002076}
2077
Benoît Canetdc364f42014-01-23 21:31:32 +01002078/* make a BlockDriverState anonymous by removing from bdrv_state and
2079 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002080 Also, NULL terminate the device_name to prevent double remove */
2081void bdrv_make_anon(BlockDriverState *bs)
2082{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002083 /*
2084 * Take care to remove bs from bdrv_states only when it's actually
2085 * in it. Note that bs->device_list.tqe_prev is initially null,
2086 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2087 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2088 * resetting it to null on remove.
2089 */
2090 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002091 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002092 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002093 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002094 if (bs->node_name[0] != '\0') {
2095 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2096 }
2097 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002098}
2099
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002100static void bdrv_rebind(BlockDriverState *bs)
2101{
2102 if (bs->drv && bs->drv->bdrv_rebind) {
2103 bs->drv->bdrv_rebind(bs);
2104 }
2105}
2106
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002107static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2108 BlockDriverState *bs_src)
2109{
2110 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002111
2112 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002113 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002114 bs_dest->copy_on_read = bs_src->copy_on_read;
2115
2116 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2117
Benoît Canetcc0681c2013-09-02 14:14:39 +02002118 /* i/o throttled req */
2119 memcpy(&bs_dest->throttle_state,
2120 &bs_src->throttle_state,
2121 sizeof(ThrottleState));
2122 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2123 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002124 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2125
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002126 /* r/w error */
2127 bs_dest->on_read_error = bs_src->on_read_error;
2128 bs_dest->on_write_error = bs_src->on_write_error;
2129
2130 /* i/o status */
2131 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2132 bs_dest->iostatus = bs_src->iostatus;
2133
2134 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002135 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002136
Fam Zheng9fcb0252013-08-23 09:14:46 +08002137 /* reference count */
2138 bs_dest->refcnt = bs_src->refcnt;
2139
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002140 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002141 bs_dest->job = bs_src->job;
2142
2143 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002144 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002145 bs_dest->blk = bs_src->blk;
2146
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002147 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2148 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002149}
2150
2151/*
2152 * Swap bs contents for two image chains while they are live,
2153 * while keeping required fields on the BlockDriverState that is
2154 * actually attached to a device.
2155 *
2156 * This will modify the BlockDriverState fields, and swap contents
2157 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2158 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002159 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002160 *
2161 * This function does not create any image files.
2162 */
2163void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2164{
2165 BlockDriverState tmp;
2166
Benoît Canet90ce8a02014-03-05 23:48:29 +01002167 /* The code needs to swap the node_name but simply swapping node_list won't
2168 * work so first remove the nodes from the graph list, do the swap then
2169 * insert them back if needed.
2170 */
2171 if (bs_new->node_name[0] != '\0') {
2172 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2173 }
2174 if (bs_old->node_name[0] != '\0') {
2175 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2176 }
2177
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002178 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002179 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002180 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002181 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002182 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002183 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002184
2185 tmp = *bs_new;
2186 *bs_new = *bs_old;
2187 *bs_old = tmp;
2188
2189 /* there are some fields that should not be swapped, move them back */
2190 bdrv_move_feature_fields(&tmp, bs_old);
2191 bdrv_move_feature_fields(bs_old, bs_new);
2192 bdrv_move_feature_fields(bs_new, &tmp);
2193
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002194 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002195 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002196
2197 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002198 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002199 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002200 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002201
Benoît Canet90ce8a02014-03-05 23:48:29 +01002202 /* insert the nodes back into the graph node list if needed */
2203 if (bs_new->node_name[0] != '\0') {
2204 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2205 }
2206 if (bs_old->node_name[0] != '\0') {
2207 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2208 }
2209
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002210 bdrv_rebind(bs_new);
2211 bdrv_rebind(bs_old);
2212}
2213
Jeff Cody8802d1f2012-02-28 15:54:06 -05002214/*
2215 * Add new bs contents at the top of an image chain while the chain is
2216 * live, while keeping required fields on the top layer.
2217 *
2218 * This will modify the BlockDriverState fields, and swap contents
2219 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2220 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002221 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002222 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002223 * This function does not create any image files.
2224 */
2225void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2226{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002227 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002228
2229 /* The contents of 'tmp' will become bs_top, as we are
2230 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002231 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002232}
2233
Fam Zheng4f6fd342013-08-23 09:14:47 +08002234static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002235{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002236 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002237 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002238 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002239 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002240
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002241 bdrv_close(bs);
2242
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002243 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002244 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002245
Anthony Liguori7267c092011-08-20 22:09:37 -05002246 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002247}
2248
aliguorie97fc192009-04-21 23:11:50 +00002249/*
2250 * Run consistency checks on an image
2251 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002252 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002253 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002254 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002255 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002256int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002257{
Max Reitz908bcd52014-08-07 22:47:55 +02002258 if (bs->drv == NULL) {
2259 return -ENOMEDIUM;
2260 }
aliguorie97fc192009-04-21 23:11:50 +00002261 if (bs->drv->bdrv_check == NULL) {
2262 return -ENOTSUP;
2263 }
2264
Kevin Wolfe076f332010-06-29 11:43:13 +02002265 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002266 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002267}
2268
Kevin Wolf8a426612010-07-16 17:17:01 +02002269#define COMMIT_BUF_SECTORS 2048
2270
bellard33e39632003-07-06 17:15:21 +00002271/* commit COW file into the raw image */
2272int bdrv_commit(BlockDriverState *bs)
2273{
bellard19cb3732006-08-19 11:45:59 +00002274 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002275 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002276 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002277 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002278 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002279
bellard19cb3732006-08-19 11:45:59 +00002280 if (!drv)
2281 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002282
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002283 if (!bs->backing_hd) {
2284 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002285 }
2286
Fam Zhengbb000212014-09-11 13:14:00 +08002287 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2288 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002289 return -EBUSY;
2290 }
2291
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002292 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002293 open_flags = bs->backing_hd->open_flags;
2294
2295 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002296 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2297 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002298 }
bellard33e39632003-07-06 17:15:21 +00002299 }
bellardea2384d2004-08-01 21:59:26 +00002300
Jeff Cody72706ea2014-01-24 09:02:35 -05002301 length = bdrv_getlength(bs);
2302 if (length < 0) {
2303 ret = length;
2304 goto ro_cleanup;
2305 }
2306
2307 backing_length = bdrv_getlength(bs->backing_hd);
2308 if (backing_length < 0) {
2309 ret = backing_length;
2310 goto ro_cleanup;
2311 }
2312
2313 /* If our top snapshot is larger than the backing file image,
2314 * grow the backing file image if possible. If not possible,
2315 * we must return an error */
2316 if (length > backing_length) {
2317 ret = bdrv_truncate(bs->backing_hd, length);
2318 if (ret < 0) {
2319 goto ro_cleanup;
2320 }
2321 }
2322
2323 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002324
2325 /* qemu_try_blockalign() for bs will choose an alignment that works for
2326 * bs->backing_hd as well, so no need to compare the alignment manually. */
2327 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2328 if (buf == NULL) {
2329 ret = -ENOMEM;
2330 goto ro_cleanup;
2331 }
bellardea2384d2004-08-01 21:59:26 +00002332
Kevin Wolf8a426612010-07-16 17:17:01 +02002333 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002334 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2335 if (ret < 0) {
2336 goto ro_cleanup;
2337 }
2338 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002339 ret = bdrv_read(bs, sector, buf, n);
2340 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002341 goto ro_cleanup;
2342 }
2343
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002344 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2345 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002346 goto ro_cleanup;
2347 }
bellardea2384d2004-08-01 21:59:26 +00002348 }
2349 }
bellard95389c82005-12-18 18:28:15 +00002350
Christoph Hellwig1d449522010-01-17 12:32:30 +01002351 if (drv->bdrv_make_empty) {
2352 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002353 if (ret < 0) {
2354 goto ro_cleanup;
2355 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002356 bdrv_flush(bs);
2357 }
bellard95389c82005-12-18 18:28:15 +00002358
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002359 /*
2360 * Make sure all data we wrote to the backing device is actually
2361 * stable on disk.
2362 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002363 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002364 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002365 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002366
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002367 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002368ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002369 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002370
2371 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002372 /* ignoring error return here */
2373 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002374 }
2375
Christoph Hellwig1d449522010-01-17 12:32:30 +01002376 return ret;
bellard33e39632003-07-06 17:15:21 +00002377}
2378
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002379int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002380{
2381 BlockDriverState *bs;
2382
Benoît Canetdc364f42014-01-23 21:31:32 +01002383 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002384 AioContext *aio_context = bdrv_get_aio_context(bs);
2385
2386 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002387 if (bs->drv && bs->backing_hd) {
2388 int ret = bdrv_commit(bs);
2389 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002390 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002391 return ret;
2392 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002393 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002394 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002395 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002396 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002397}
2398
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002399/**
2400 * Remove an active request from the tracked requests list
2401 *
2402 * This function should be called when a tracked request is completing.
2403 */
2404static void tracked_request_end(BdrvTrackedRequest *req)
2405{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002406 if (req->serialising) {
2407 req->bs->serialising_in_flight--;
2408 }
2409
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002410 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002411 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002412}
2413
2414/**
2415 * Add an active request to the tracked requests list
2416 */
2417static void tracked_request_begin(BdrvTrackedRequest *req,
2418 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002419 int64_t offset,
2420 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002421{
2422 *req = (BdrvTrackedRequest){
2423 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002424 .offset = offset,
2425 .bytes = bytes,
2426 .is_write = is_write,
2427 .co = qemu_coroutine_self(),
2428 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002429 .overlap_offset = offset,
2430 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002431 };
2432
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002433 qemu_co_queue_init(&req->wait_queue);
2434
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002435 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2436}
2437
Kevin Wolfe96126f2014-02-08 10:42:18 +01002438static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002439{
Kevin Wolf73271452013-12-04 17:08:50 +01002440 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002441 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2442 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002443
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002444 if (!req->serialising) {
2445 req->bs->serialising_in_flight++;
2446 req->serialising = true;
2447 }
Kevin Wolf73271452013-12-04 17:08:50 +01002448
2449 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2450 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002451}
2452
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002453/**
2454 * Round a region to cluster boundaries
2455 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002456void bdrv_round_to_clusters(BlockDriverState *bs,
2457 int64_t sector_num, int nb_sectors,
2458 int64_t *cluster_sector_num,
2459 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002460{
2461 BlockDriverInfo bdi;
2462
2463 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2464 *cluster_sector_num = sector_num;
2465 *cluster_nb_sectors = nb_sectors;
2466 } else {
2467 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2468 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2469 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2470 nb_sectors, c);
2471 }
2472}
2473
Kevin Wolf73271452013-12-04 17:08:50 +01002474static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002475{
2476 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002477 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002478
Kevin Wolf73271452013-12-04 17:08:50 +01002479 ret = bdrv_get_info(bs, &bdi);
2480 if (ret < 0 || bdi.cluster_size == 0) {
2481 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002482 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002483 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002484 }
2485}
2486
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002487static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002488 int64_t offset, unsigned int bytes)
2489{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002490 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002491 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002492 return false;
2493 }
2494 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002495 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002496 return false;
2497 }
2498 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002499}
2500
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002501static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002502{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002503 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002504 BdrvTrackedRequest *req;
2505 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002506 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002507
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002508 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002509 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002510 }
2511
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002512 do {
2513 retry = false;
2514 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002515 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002516 continue;
2517 }
Kevin Wolf73271452013-12-04 17:08:50 +01002518 if (tracked_request_overlaps(req, self->overlap_offset,
2519 self->overlap_bytes))
2520 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002521 /* Hitting this means there was a reentrant request, for
2522 * example, a block driver issuing nested requests. This must
2523 * never happen since it means deadlock.
2524 */
2525 assert(qemu_coroutine_self() != req->co);
2526
Kevin Wolf64604402013-12-13 13:04:35 +01002527 /* If the request is already (indirectly) waiting for us, or
2528 * will wait for us as soon as it wakes up, then just go on
2529 * (instead of producing a deadlock in the former case). */
2530 if (!req->waiting_for) {
2531 self->waiting_for = req;
2532 qemu_co_queue_wait(&req->wait_queue);
2533 self->waiting_for = NULL;
2534 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002535 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002536 break;
2537 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002538 }
2539 }
2540 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002541
2542 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002543}
2544
Kevin Wolf756e6732010-01-12 12:55:17 +01002545/*
2546 * Return values:
2547 * 0 - success
2548 * -EINVAL - backing format specified, but no file
2549 * -ENOSPC - can't update the backing file because no space is left in the
2550 * image file header
2551 * -ENOTSUP - format driver doesn't support changing the backing file
2552 */
2553int bdrv_change_backing_file(BlockDriverState *bs,
2554 const char *backing_file, const char *backing_fmt)
2555{
2556 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002557 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002558
Paolo Bonzini5f377792012-04-12 14:01:01 +02002559 /* Backing file format doesn't make sense without a backing file */
2560 if (backing_fmt && !backing_file) {
2561 return -EINVAL;
2562 }
2563
Kevin Wolf756e6732010-01-12 12:55:17 +01002564 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002565 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002566 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002567 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002568 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002569
2570 if (ret == 0) {
2571 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2572 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2573 }
2574 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002575}
2576
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002577/*
2578 * Finds the image layer in the chain that has 'bs' as its backing file.
2579 *
2580 * active is the current topmost image.
2581 *
2582 * Returns NULL if bs is not found in active's image chain,
2583 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002584 *
2585 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002586 */
2587BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2588 BlockDriverState *bs)
2589{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002590 while (active && bs != active->backing_hd) {
2591 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002592 }
2593
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002594 return active;
2595}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002596
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002597/* Given a BDS, searches for the base layer. */
2598BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2599{
2600 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002601}
2602
2603typedef struct BlkIntermediateStates {
2604 BlockDriverState *bs;
2605 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2606} BlkIntermediateStates;
2607
2608
2609/*
2610 * Drops images above 'base' up to and including 'top', and sets the image
2611 * above 'top' to have base as its backing file.
2612 *
2613 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2614 * information in 'bs' can be properly updated.
2615 *
2616 * E.g., this will convert the following chain:
2617 * bottom <- base <- intermediate <- top <- active
2618 *
2619 * to
2620 *
2621 * bottom <- base <- active
2622 *
2623 * It is allowed for bottom==base, in which case it converts:
2624 *
2625 * base <- intermediate <- top <- active
2626 *
2627 * to
2628 *
2629 * base <- active
2630 *
Jeff Cody54e26902014-06-25 15:40:10 -04002631 * If backing_file_str is non-NULL, it will be used when modifying top's
2632 * overlay image metadata.
2633 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002634 * Error conditions:
2635 * if active == top, that is considered an error
2636 *
2637 */
2638int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002639 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002640{
2641 BlockDriverState *intermediate;
2642 BlockDriverState *base_bs = NULL;
2643 BlockDriverState *new_top_bs = NULL;
2644 BlkIntermediateStates *intermediate_state, *next;
2645 int ret = -EIO;
2646
2647 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2648 QSIMPLEQ_INIT(&states_to_delete);
2649
2650 if (!top->drv || !base->drv) {
2651 goto exit;
2652 }
2653
2654 new_top_bs = bdrv_find_overlay(active, top);
2655
2656 if (new_top_bs == NULL) {
2657 /* we could not find the image above 'top', this is an error */
2658 goto exit;
2659 }
2660
2661 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2662 * to do, no intermediate images */
2663 if (new_top_bs->backing_hd == base) {
2664 ret = 0;
2665 goto exit;
2666 }
2667
2668 intermediate = top;
2669
2670 /* now we will go down through the list, and add each BDS we find
2671 * into our deletion queue, until we hit the 'base'
2672 */
2673 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002674 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002675 intermediate_state->bs = intermediate;
2676 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2677
2678 if (intermediate->backing_hd == base) {
2679 base_bs = intermediate->backing_hd;
2680 break;
2681 }
2682 intermediate = intermediate->backing_hd;
2683 }
2684 if (base_bs == NULL) {
2685 /* something went wrong, we did not end at the base. safely
2686 * unravel everything, and exit with error */
2687 goto exit;
2688 }
2689
2690 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002691 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2692 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002693 base_bs->drv ? base_bs->drv->format_name : "");
2694 if (ret) {
2695 goto exit;
2696 }
Fam Zheng920beae2014-05-23 21:29:46 +08002697 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002698
2699 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2700 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002701 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002702 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002703 }
2704 ret = 0;
2705
2706exit:
2707 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2708 g_free(intermediate_state);
2709 }
2710 return ret;
2711}
2712
2713
aliguori71d07702009-03-03 17:37:16 +00002714static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2715 size_t size)
2716{
Peter Lieven75af1f32015-02-06 11:54:11 +01002717 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002718 return -EIO;
2719 }
2720
Max Reitzc0191e72015-02-05 13:58:24 -05002721 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002722 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002723 }
aliguori71d07702009-03-03 17:37:16 +00002724
Max Reitzc0191e72015-02-05 13:58:24 -05002725 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002726 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002727 }
aliguori71d07702009-03-03 17:37:16 +00002728
2729 return 0;
2730}
2731
2732static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2733 int nb_sectors)
2734{
Peter Lieven75af1f32015-02-06 11:54:11 +01002735 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002736 return -EIO;
2737 }
2738
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002739 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2740 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002741}
2742
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002743typedef struct RwCo {
2744 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002745 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002746 QEMUIOVector *qiov;
2747 bool is_write;
2748 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002749 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002750} RwCo;
2751
2752static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2753{
2754 RwCo *rwco = opaque;
2755
2756 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002757 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2758 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002759 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002760 } else {
2761 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2762 rwco->qiov->size, rwco->qiov,
2763 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002764 }
2765}
2766
2767/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002768 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002769 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002770static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2771 QEMUIOVector *qiov, bool is_write,
2772 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002773{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002774 Coroutine *co;
2775 RwCo rwco = {
2776 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002777 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002778 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002779 .is_write = is_write,
2780 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002781 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002782 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002783
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002784 /**
2785 * In sync call context, when the vcpu is blocked, this throttling timer
2786 * will not fire; so the I/O throttling function has to be disabled here
2787 * if it has been enabled.
2788 */
2789 if (bs->io_limits_enabled) {
2790 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2791 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2792 bdrv_io_limits_disable(bs);
2793 }
2794
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002795 if (qemu_in_coroutine()) {
2796 /* Fast-path if already in coroutine context */
2797 bdrv_rw_co_entry(&rwco);
2798 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002799 AioContext *aio_context = bdrv_get_aio_context(bs);
2800
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002801 co = qemu_coroutine_create(bdrv_rw_co_entry);
2802 qemu_coroutine_enter(co, &rwco);
2803 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002804 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002805 }
2806 }
2807 return rwco.ret;
2808}
2809
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002810/*
2811 * Process a synchronous request using coroutines
2812 */
2813static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002814 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002815{
2816 QEMUIOVector qiov;
2817 struct iovec iov = {
2818 .iov_base = (void *)buf,
2819 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2820 };
2821
Peter Lieven75af1f32015-02-06 11:54:11 +01002822 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002823 return -EINVAL;
2824 }
2825
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002826 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002827 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2828 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002829}
2830
bellard19cb3732006-08-19 11:45:59 +00002831/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002832int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002833 uint8_t *buf, int nb_sectors)
2834{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002835 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002836}
2837
Markus Armbruster07d27a42012-06-29 17:34:29 +02002838/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2839int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2840 uint8_t *buf, int nb_sectors)
2841{
2842 bool enabled;
2843 int ret;
2844
2845 enabled = bs->io_limits_enabled;
2846 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002847 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002848 bs->io_limits_enabled = enabled;
2849 return ret;
2850}
2851
ths5fafdf22007-09-16 21:08:06 +00002852/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002853 -EIO generic I/O error (may happen for all errors)
2854 -ENOMEDIUM No media inserted.
2855 -EINVAL Invalid sector number or nb_sectors
2856 -EACCES Trying to write a read-only device
2857*/
ths5fafdf22007-09-16 21:08:06 +00002858int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002859 const uint8_t *buf, int nb_sectors)
2860{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002861 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002862}
2863
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002864int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2865 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002866{
2867 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002868 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002869}
2870
Peter Lievend75cbb52013-10-24 12:07:03 +02002871/*
2872 * Completely zero out a block device with the help of bdrv_write_zeroes.
2873 * The operation is sped up by checking the block status and only writing
2874 * zeroes to the device if they currently do not return zeroes. Optional
2875 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2876 *
2877 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2878 */
2879int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2880{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002881 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002882 int n;
2883
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002884 target_sectors = bdrv_nb_sectors(bs);
2885 if (target_sectors < 0) {
2886 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002887 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002888
Peter Lievend75cbb52013-10-24 12:07:03 +02002889 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002890 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002891 if (nb_sectors <= 0) {
2892 return 0;
2893 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002894 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002895 if (ret < 0) {
2896 error_report("error getting block status at sector %" PRId64 ": %s",
2897 sector_num, strerror(-ret));
2898 return ret;
2899 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002900 if (ret & BDRV_BLOCK_ZERO) {
2901 sector_num += n;
2902 continue;
2903 }
2904 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2905 if (ret < 0) {
2906 error_report("error writing zeroes at sector %" PRId64 ": %s",
2907 sector_num, strerror(-ret));
2908 return ret;
2909 }
2910 sector_num += n;
2911 }
2912}
2913
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002914int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002915{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002916 QEMUIOVector qiov;
2917 struct iovec iov = {
2918 .iov_base = (void *)buf,
2919 .iov_len = bytes,
2920 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002921 int ret;
bellard83f64092006-08-01 16:21:11 +00002922
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002923 if (bytes < 0) {
2924 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002925 }
2926
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002927 qemu_iovec_init_external(&qiov, &iov, 1);
2928 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2929 if (ret < 0) {
2930 return ret;
bellard83f64092006-08-01 16:21:11 +00002931 }
2932
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002933 return bytes;
bellard83f64092006-08-01 16:21:11 +00002934}
2935
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002936int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002937{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002938 int ret;
bellard83f64092006-08-01 16:21:11 +00002939
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002940 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2941 if (ret < 0) {
2942 return ret;
bellard83f64092006-08-01 16:21:11 +00002943 }
2944
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002945 return qiov->size;
2946}
2947
2948int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002949 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002950{
2951 QEMUIOVector qiov;
2952 struct iovec iov = {
2953 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002954 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002955 };
2956
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002957 if (bytes < 0) {
2958 return -EINVAL;
2959 }
2960
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002961 qemu_iovec_init_external(&qiov, &iov, 1);
2962 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002963}
bellard83f64092006-08-01 16:21:11 +00002964
Kevin Wolff08145f2010-06-16 16:38:15 +02002965/*
2966 * Writes to the file and ensures that no writes are reordered across this
2967 * request (acts as a barrier)
2968 *
2969 * Returns 0 on success, -errno in error cases.
2970 */
2971int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2972 const void *buf, int count)
2973{
2974 int ret;
2975
2976 ret = bdrv_pwrite(bs, offset, buf, count);
2977 if (ret < 0) {
2978 return ret;
2979 }
2980
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002981 /* No flush needed for cache modes that already do it */
2982 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002983 bdrv_flush(bs);
2984 }
2985
2986 return 0;
2987}
2988
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002989static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002990 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2991{
2992 /* Perform I/O through a temporary buffer so that users who scribble over
2993 * their read buffer while the operation is in progress do not end up
2994 * modifying the image file. This is critical for zero-copy guest I/O
2995 * where anything might happen inside guest memory.
2996 */
2997 void *bounce_buffer;
2998
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002999 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00003000 struct iovec iov;
3001 QEMUIOVector bounce_qiov;
3002 int64_t cluster_sector_num;
3003 int cluster_nb_sectors;
3004 size_t skip_bytes;
3005 int ret;
3006
3007 /* Cover entire cluster so no additional backing file I/O is required when
3008 * allocating cluster in the image file.
3009 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003010 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3011 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003012
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003013 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3014 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003015
3016 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003017 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3018 if (bounce_buffer == NULL) {
3019 ret = -ENOMEM;
3020 goto err;
3021 }
3022
Stefan Hajnocziab185922011-11-17 13:40:31 +00003023 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3024
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003025 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3026 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003027 if (ret < 0) {
3028 goto err;
3029 }
3030
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003031 if (drv->bdrv_co_write_zeroes &&
3032 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003033 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003034 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003035 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003036 /* This does not change the data on the disk, it is not necessary
3037 * to flush even in cache=writethrough mode.
3038 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003039 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003040 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003041 }
3042
Stefan Hajnocziab185922011-11-17 13:40:31 +00003043 if (ret < 0) {
3044 /* It might be okay to ignore write errors for guest requests. If this
3045 * is a deliberate copy-on-read then we don't want to ignore the error.
3046 * Simply report it in all cases.
3047 */
3048 goto err;
3049 }
3050
3051 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003052 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3053 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003054
3055err:
3056 qemu_vfree(bounce_buffer);
3057 return ret;
3058}
3059
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003060/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003061 * Forwards an already correctly aligned request to the BlockDriver. This
3062 * handles copy on read and zeroing after EOF; any other features must be
3063 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003064 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003065static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003066 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003067 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003068{
3069 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003070 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003071
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003072 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3073 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003074
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003075 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3076 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003077 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003078
3079 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003080 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003081 /* If we touch the same cluster it counts as an overlap. This
3082 * guarantees that allocating writes will be serialized and not race
3083 * with each other for the same cluster. For example, in copy-on-read
3084 * it ensures that the CoR read and write operations are atomic and
3085 * guest writes cannot interleave between them. */
3086 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003087 }
3088
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003089 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003090
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003091 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003092 int pnum;
3093
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003094 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003095 if (ret < 0) {
3096 goto out;
3097 }
3098
3099 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003100 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003101 goto out;
3102 }
3103 }
3104
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003105 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003106 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003107 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3108 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003109 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003110 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003111
Markus Armbruster40490822014-06-26 13:23:19 +02003112 total_sectors = bdrv_nb_sectors(bs);
3113 if (total_sectors < 0) {
3114 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003115 goto out;
3116 }
3117
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003118 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3119 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003120 if (nb_sectors < max_nb_sectors) {
3121 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3122 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003123 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003124
3125 qemu_iovec_init(&local_qiov, qiov->niov);
3126 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003127 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003128
Paolo Bonzinie012b782014-12-17 16:09:59 +01003129 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003130 &local_qiov);
3131
3132 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003133 } else {
3134 ret = 0;
3135 }
3136
3137 /* Reading beyond end of file is supposed to produce zeroes */
3138 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3139 uint64_t offset = MAX(0, total_sectors - sector_num);
3140 uint64_t bytes = (sector_num + nb_sectors - offset) *
3141 BDRV_SECTOR_SIZE;
3142 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3143 }
3144 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003145
3146out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003147 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003148}
3149
Fam Zhengfc3959e2015-03-24 09:23:49 +08003150static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3151{
3152 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3153 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3154}
3155
3156static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3157 int64_t offset, size_t bytes)
3158{
3159 int64_t align = bdrv_get_align(bs);
3160 return !(offset & (align - 1) || (bytes & (align - 1)));
3161}
3162
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003163/*
3164 * Handle a read request in coroutine context
3165 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003166static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3167 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003168 BdrvRequestFlags flags)
3169{
3170 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003171 BdrvTrackedRequest req;
3172
Fam Zhengfc3959e2015-03-24 09:23:49 +08003173 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003174 uint8_t *head_buf = NULL;
3175 uint8_t *tail_buf = NULL;
3176 QEMUIOVector local_qiov;
3177 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003178 int ret;
3179
3180 if (!drv) {
3181 return -ENOMEDIUM;
3182 }
Max Reitzb9c64942015-02-05 13:58:25 -05003183
3184 ret = bdrv_check_byte_request(bs, offset, bytes);
3185 if (ret < 0) {
3186 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003187 }
3188
3189 if (bs->copy_on_read) {
3190 flags |= BDRV_REQ_COPY_ON_READ;
3191 }
3192
3193 /* throttling disk I/O */
3194 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003195 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003196 }
3197
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003198 /* Align read if necessary by padding qiov */
3199 if (offset & (align - 1)) {
3200 head_buf = qemu_blockalign(bs, align);
3201 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3202 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3203 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3204 use_local_qiov = true;
3205
3206 bytes += offset & (align - 1);
3207 offset = offset & ~(align - 1);
3208 }
3209
3210 if ((offset + bytes) & (align - 1)) {
3211 if (!use_local_qiov) {
3212 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3213 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3214 use_local_qiov = true;
3215 }
3216 tail_buf = qemu_blockalign(bs, align);
3217 qemu_iovec_add(&local_qiov, tail_buf,
3218 align - ((offset + bytes) & (align - 1)));
3219
3220 bytes = ROUND_UP(bytes, align);
3221 }
3222
Kevin Wolf65afd212013-12-03 14:55:55 +01003223 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003224 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003225 use_local_qiov ? &local_qiov : qiov,
3226 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003227 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003228
3229 if (use_local_qiov) {
3230 qemu_iovec_destroy(&local_qiov);
3231 qemu_vfree(head_buf);
3232 qemu_vfree(tail_buf);
3233 }
3234
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003235 return ret;
3236}
3237
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003238static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3239 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3240 BdrvRequestFlags flags)
3241{
Peter Lieven75af1f32015-02-06 11:54:11 +01003242 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003243 return -EINVAL;
3244 }
3245
3246 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3247 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3248}
3249
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003250int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003251 int nb_sectors, QEMUIOVector *qiov)
3252{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003253 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003254
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003255 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3256}
3257
3258int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3259 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3260{
3261 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3262
3263 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3264 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003265}
3266
Peter Lieven98764152015-02-02 15:48:34 +01003267#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003268
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003269static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003270 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003271{
3272 BlockDriver *drv = bs->drv;
3273 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003274 struct iovec iov = {0};
3275 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003276
Peter Lieven75af1f32015-02-06 11:54:11 +01003277 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3278 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003279
Peter Lievenc31cb702013-10-24 12:06:58 +02003280 while (nb_sectors > 0 && !ret) {
3281 int num = nb_sectors;
3282
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003283 /* Align request. Block drivers can expect the "bulk" of the request
3284 * to be aligned.
3285 */
3286 if (bs->bl.write_zeroes_alignment
3287 && num > bs->bl.write_zeroes_alignment) {
3288 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3289 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003290 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003291 num -= sector_num % bs->bl.write_zeroes_alignment;
3292 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3293 /* Shorten the request to the last aligned sector. num cannot
3294 * underflow because num > bs->bl.write_zeroes_alignment.
3295 */
3296 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003297 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003298 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003299
3300 /* limit request size */
3301 if (num > max_write_zeroes) {
3302 num = max_write_zeroes;
3303 }
3304
3305 ret = -ENOTSUP;
3306 /* First try the efficient write zeroes operation */
3307 if (drv->bdrv_co_write_zeroes) {
3308 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3309 }
3310
3311 if (ret == -ENOTSUP) {
3312 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003313 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003314 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003315 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003316 iov.iov_len = num * BDRV_SECTOR_SIZE;
3317 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003318 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3319 if (iov.iov_base == NULL) {
3320 ret = -ENOMEM;
3321 goto fail;
3322 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003323 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003324 }
3325 qemu_iovec_init_external(&qiov, &iov, 1);
3326
3327 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003328
3329 /* Keep bounce buffer around if it is big enough for all
3330 * all future requests.
3331 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003332 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003333 qemu_vfree(iov.iov_base);
3334 iov.iov_base = NULL;
3335 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003336 }
3337
3338 sector_num += num;
3339 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003340 }
3341
Kevin Wolf857d4f42014-05-20 13:16:51 +02003342fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003343 qemu_vfree(iov.iov_base);
3344 return ret;
3345}
3346
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003347/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003348 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003349 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003350static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003351 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3352 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003353{
3354 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003355 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003356 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003357
Kevin Wolfb404f722013-12-03 14:02:23 +01003358 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3359 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003360
Kevin Wolfb404f722013-12-03 14:02:23 +01003361 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3362 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003363 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003364
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003365 waited = wait_serialising_requests(req);
3366 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003367 assert(req->overlap_offset <= offset);
3368 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003369
Kevin Wolf65afd212013-12-03 14:55:55 +01003370 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003371
Peter Lieven465bee12014-05-18 00:58:19 +02003372 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3373 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3374 qemu_iovec_is_zero(qiov)) {
3375 flags |= BDRV_REQ_ZERO_WRITE;
3376 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3377 flags |= BDRV_REQ_MAY_UNMAP;
3378 }
3379 }
3380
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003381 if (ret < 0) {
3382 /* Do nothing, write notifier decided to fail this request */
3383 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003384 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003385 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003386 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003387 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003388 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3389 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003390 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003391
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003392 if (ret == 0 && !bs->enable_write_cache) {
3393 ret = bdrv_co_flush(bs);
3394 }
3395
Fam Zhenge4654d22013-11-13 18:29:43 +08003396 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003397
Benoît Canet5366d0c2014-09-05 15:46:18 +02003398 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003399
Max Reitzc0191e72015-02-05 13:58:24 -05003400 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003401 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3402 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003403
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003404 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003405}
3406
Kevin Wolfb404f722013-12-03 14:02:23 +01003407/*
3408 * Handle a write request in coroutine context
3409 */
Kevin Wolf66015532013-12-03 14:40:18 +01003410static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3411 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003412 BdrvRequestFlags flags)
3413{
Kevin Wolf65afd212013-12-03 14:55:55 +01003414 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003415 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003416 uint8_t *head_buf = NULL;
3417 uint8_t *tail_buf = NULL;
3418 QEMUIOVector local_qiov;
3419 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003420 int ret;
3421
3422 if (!bs->drv) {
3423 return -ENOMEDIUM;
3424 }
3425 if (bs->read_only) {
3426 return -EACCES;
3427 }
Max Reitzb9c64942015-02-05 13:58:25 -05003428
3429 ret = bdrv_check_byte_request(bs, offset, bytes);
3430 if (ret < 0) {
3431 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003432 }
3433
Kevin Wolfb404f722013-12-03 14:02:23 +01003434 /* throttling disk I/O */
3435 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003436 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003437 }
3438
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003439 /*
3440 * Align write if necessary by performing a read-modify-write cycle.
3441 * Pad qiov with the read parts and be sure to have a tracked request not
3442 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3443 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003444 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003445
3446 if (offset & (align - 1)) {
3447 QEMUIOVector head_qiov;
3448 struct iovec head_iov;
3449
3450 mark_request_serialising(&req, align);
3451 wait_serialising_requests(&req);
3452
3453 head_buf = qemu_blockalign(bs, align);
3454 head_iov = (struct iovec) {
3455 .iov_base = head_buf,
3456 .iov_len = align,
3457 };
3458 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3459
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003460 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003461 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3462 align, &head_qiov, 0);
3463 if (ret < 0) {
3464 goto fail;
3465 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003466 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003467
3468 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3469 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3470 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3471 use_local_qiov = true;
3472
3473 bytes += offset & (align - 1);
3474 offset = offset & ~(align - 1);
3475 }
3476
3477 if ((offset + bytes) & (align - 1)) {
3478 QEMUIOVector tail_qiov;
3479 struct iovec tail_iov;
3480 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003481 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003482
3483 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003484 waited = wait_serialising_requests(&req);
3485 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003486
3487 tail_buf = qemu_blockalign(bs, align);
3488 tail_iov = (struct iovec) {
3489 .iov_base = tail_buf,
3490 .iov_len = align,
3491 };
3492 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3493
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003494 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003495 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3496 align, &tail_qiov, 0);
3497 if (ret < 0) {
3498 goto fail;
3499 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003500 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003501
3502 if (!use_local_qiov) {
3503 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3504 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3505 use_local_qiov = true;
3506 }
3507
3508 tail_bytes = (offset + bytes) & (align - 1);
3509 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3510
3511 bytes = ROUND_UP(bytes, align);
3512 }
3513
Fam Zhengfc3959e2015-03-24 09:23:49 +08003514 if (use_local_qiov) {
3515 /* Local buffer may have non-zero data. */
3516 flags &= ~BDRV_REQ_ZERO_WRITE;
3517 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003518 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3519 use_local_qiov ? &local_qiov : qiov,
3520 flags);
3521
3522fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003523 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003524
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003525 if (use_local_qiov) {
3526 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003527 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003528 qemu_vfree(head_buf);
3529 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003530
Kevin Wolfb404f722013-12-03 14:02:23 +01003531 return ret;
3532}
3533
Kevin Wolf66015532013-12-03 14:40:18 +01003534static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3535 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3536 BdrvRequestFlags flags)
3537{
Peter Lieven75af1f32015-02-06 11:54:11 +01003538 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003539 return -EINVAL;
3540 }
3541
3542 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3543 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3544}
3545
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003546int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3547 int nb_sectors, QEMUIOVector *qiov)
3548{
3549 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3550
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003551 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3552}
3553
3554int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003555 int64_t sector_num, int nb_sectors,
3556 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003557{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003558 int ret;
3559
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003560 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003561
Peter Lievend32f35c2013-10-24 12:06:52 +02003562 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3563 flags &= ~BDRV_REQ_MAY_UNMAP;
3564 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003565 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3566 nb_sectors << BDRV_SECTOR_BITS)) {
3567 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3568 BDRV_REQ_ZERO_WRITE | flags);
3569 } else {
3570 uint8_t *buf;
3571 QEMUIOVector local_qiov;
3572 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003573
Fam Zhengfc3959e2015-03-24 09:23:49 +08003574 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3575 memset(buf, 0, bytes);
3576 qemu_iovec_init(&local_qiov, 1);
3577 qemu_iovec_add(&local_qiov, buf, bytes);
3578
3579 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3580 BDRV_REQ_ZERO_WRITE | flags);
3581 qemu_vfree(buf);
3582 }
3583 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003584}
3585
bellard83f64092006-08-01 16:21:11 +00003586/**
bellard83f64092006-08-01 16:21:11 +00003587 * Truncate file to 'offset' bytes (needed only for file protocols)
3588 */
3589int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3590{
3591 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003592 int ret;
bellard83f64092006-08-01 16:21:11 +00003593 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003594 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003595 if (!drv->bdrv_truncate)
3596 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003597 if (bs->read_only)
3598 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003599
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003600 ret = drv->bdrv_truncate(bs, offset);
3601 if (ret == 0) {
3602 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003603 if (bs->blk) {
3604 blk_dev_resize_cb(bs->blk);
3605 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003606 }
3607 return ret;
bellard83f64092006-08-01 16:21:11 +00003608}
3609
3610/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003611 * Length of a allocated file in bytes. Sparse files are counted by actual
3612 * allocated space. Return < 0 if error or unknown.
3613 */
3614int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3615{
3616 BlockDriver *drv = bs->drv;
3617 if (!drv) {
3618 return -ENOMEDIUM;
3619 }
3620 if (drv->bdrv_get_allocated_file_size) {
3621 return drv->bdrv_get_allocated_file_size(bs);
3622 }
3623 if (bs->file) {
3624 return bdrv_get_allocated_file_size(bs->file);
3625 }
3626 return -ENOTSUP;
3627}
3628
3629/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003630 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003631 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003632int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003633{
3634 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003635
bellard83f64092006-08-01 16:21:11 +00003636 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003637 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003638
Kevin Wolfb94a2612013-10-29 12:18:58 +01003639 if (drv->has_variable_length) {
3640 int ret = refresh_total_sectors(bs, bs->total_sectors);
3641 if (ret < 0) {
3642 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003643 }
bellard83f64092006-08-01 16:21:11 +00003644 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003645 return bs->total_sectors;
3646}
3647
3648/**
3649 * Return length in bytes on success, -errno on error.
3650 * The length is always a multiple of BDRV_SECTOR_SIZE.
3651 */
3652int64_t bdrv_getlength(BlockDriverState *bs)
3653{
3654 int64_t ret = bdrv_nb_sectors(bs);
3655
3656 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003657}
3658
bellard19cb3732006-08-19 11:45:59 +00003659/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003660void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003661{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003662 int64_t nb_sectors = bdrv_nb_sectors(bs);
3663
3664 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003665}
bellardcf989512004-02-16 21:56:36 +00003666
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003667void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3668 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003669{
3670 bs->on_read_error = on_read_error;
3671 bs->on_write_error = on_write_error;
3672}
3673
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003674BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003675{
3676 return is_read ? bs->on_read_error : bs->on_write_error;
3677}
3678
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003679BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3680{
3681 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3682
3683 switch (on_err) {
3684 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003685 return (error == ENOSPC) ?
3686 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003687 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003688 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003689 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003690 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003691 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003692 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003693 default:
3694 abort();
3695 }
3696}
3697
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003698static void send_qmp_error_event(BlockDriverState *bs,
3699 BlockErrorAction action,
3700 bool is_read, int error)
3701{
Peter Maydell573742a2014-10-10 20:33:03 +01003702 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003703
Peter Maydell573742a2014-10-10 20:33:03 +01003704 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3705 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003706 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003707 error == ENOSPC, strerror(error),
3708 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003709}
3710
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003711/* This is done by device models because, while the block layer knows
3712 * about the error, it does not know whether an operation comes from
3713 * the device or the block layer (from a job, for example).
3714 */
3715void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3716 bool is_read, int error)
3717{
3718 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003719
Wenchao Xiaa5895692014-06-18 08:43:30 +02003720 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003721 /* First set the iostatus, so that "info block" returns an iostatus
3722 * that matches the events raised so far (an additional error iostatus
3723 * is fine, but not a lost one).
3724 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003725 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003726
3727 /* Then raise the request to stop the VM and the event.
3728 * qemu_system_vmstop_request_prepare has two effects. First,
3729 * it ensures that the STOP event always comes after the
3730 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3731 * can observe the STOP event and do a "cont" before the STOP
3732 * event is issued, the VM will not stop. In this case, vm_start()
3733 * also ensures that the STOP/RESUME pair of events is emitted.
3734 */
3735 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003736 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003737 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3738 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003739 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003740 }
3741}
3742
bellardb3380822004-03-14 21:38:54 +00003743int bdrv_is_read_only(BlockDriverState *bs)
3744{
3745 return bs->read_only;
3746}
3747
ths985a03b2007-12-24 16:10:43 +00003748int bdrv_is_sg(BlockDriverState *bs)
3749{
3750 return bs->sg;
3751}
3752
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003753int bdrv_enable_write_cache(BlockDriverState *bs)
3754{
3755 return bs->enable_write_cache;
3756}
3757
Paolo Bonzini425b0142012-06-06 00:04:52 +02003758void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3759{
3760 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003761
3762 /* so a reopen() will preserve wce */
3763 if (wce) {
3764 bs->open_flags |= BDRV_O_CACHE_WB;
3765 } else {
3766 bs->open_flags &= ~BDRV_O_CACHE_WB;
3767 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003768}
3769
bellardea2384d2004-08-01 21:59:26 +00003770int bdrv_is_encrypted(BlockDriverState *bs)
3771{
3772 if (bs->backing_hd && bs->backing_hd->encrypted)
3773 return 1;
3774 return bs->encrypted;
3775}
3776
aliguoric0f4ce72009-03-05 23:01:01 +00003777int bdrv_key_required(BlockDriverState *bs)
3778{
3779 BlockDriverState *backing_hd = bs->backing_hd;
3780
3781 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3782 return 1;
3783 return (bs->encrypted && !bs->valid_key);
3784}
3785
bellardea2384d2004-08-01 21:59:26 +00003786int bdrv_set_key(BlockDriverState *bs, const char *key)
3787{
3788 int ret;
3789 if (bs->backing_hd && bs->backing_hd->encrypted) {
3790 ret = bdrv_set_key(bs->backing_hd, key);
3791 if (ret < 0)
3792 return ret;
3793 if (!bs->encrypted)
3794 return 0;
3795 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003796 if (!bs->encrypted) {
3797 return -EINVAL;
3798 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3799 return -ENOMEDIUM;
3800 }
aliguoric0f4ce72009-03-05 23:01:01 +00003801 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003802 if (ret < 0) {
3803 bs->valid_key = 0;
3804 } else if (!bs->valid_key) {
3805 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003806 if (bs->blk) {
3807 /* call the change callback now, we skipped it on open */
3808 blk_dev_change_media_cb(bs->blk, true);
3809 }
aliguoribb5fc202009-03-05 23:01:15 +00003810 }
aliguoric0f4ce72009-03-05 23:01:01 +00003811 return ret;
bellardea2384d2004-08-01 21:59:26 +00003812}
3813
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003814/*
3815 * Provide an encryption key for @bs.
3816 * If @key is non-null:
3817 * If @bs is not encrypted, fail.
3818 * Else if the key is invalid, fail.
3819 * Else set @bs's key to @key, replacing the existing key, if any.
3820 * If @key is null:
3821 * If @bs is encrypted and still lacks a key, fail.
3822 * Else do nothing.
3823 * On failure, store an error object through @errp if non-null.
3824 */
3825void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3826{
3827 if (key) {
3828 if (!bdrv_is_encrypted(bs)) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03003829 error_setg(errp, "Node '%s' is not encrypted",
3830 bdrv_get_device_or_node_name(bs));
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003831 } else if (bdrv_set_key(bs, key) < 0) {
3832 error_set(errp, QERR_INVALID_PASSWORD);
3833 }
3834 } else {
3835 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003836 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3837 "'%s' (%s) is encrypted",
Alberto Garcia81e5f782015-04-08 12:29:19 +03003838 bdrv_get_device_or_node_name(bs),
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003839 bdrv_get_encrypted_filename(bs));
3840 }
3841 }
3842}
3843
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003844const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003845{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003846 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003847}
3848
Stefan Hajnocziada42402014-08-27 12:08:55 +01003849static int qsort_strcmp(const void *a, const void *b)
3850{
3851 return strcmp(a, b);
3852}
3853
ths5fafdf22007-09-16 21:08:06 +00003854void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003855 void *opaque)
3856{
3857 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003858 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003859 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003860 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003861
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003862 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003863 if (drv->format_name) {
3864 bool found = false;
3865 int i = count;
3866 while (formats && i && !found) {
3867 found = !strcmp(formats[--i], drv->format_name);
3868 }
3869
3870 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003871 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003872 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003873 }
3874 }
bellardea2384d2004-08-01 21:59:26 +00003875 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003876
3877 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3878
3879 for (i = 0; i < count; i++) {
3880 it(opaque, formats[i]);
3881 }
3882
Jeff Codye855e4f2014-04-28 18:29:54 -04003883 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003884}
3885
Benoît Canetdc364f42014-01-23 21:31:32 +01003886/* This function is to find a node in the bs graph */
3887BlockDriverState *bdrv_find_node(const char *node_name)
3888{
3889 BlockDriverState *bs;
3890
3891 assert(node_name);
3892
3893 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3894 if (!strcmp(node_name, bs->node_name)) {
3895 return bs;
3896 }
3897 }
3898 return NULL;
3899}
3900
Benoît Canetc13163f2014-01-23 21:31:34 +01003901/* Put this QMP function here so it can access the static graph_bdrv_states. */
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003902BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
Benoît Canetc13163f2014-01-23 21:31:34 +01003903{
3904 BlockDeviceInfoList *list, *entry;
3905 BlockDriverState *bs;
3906
3907 list = NULL;
3908 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003909 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
3910 if (!info) {
3911 qapi_free_BlockDeviceInfoList(list);
3912 return NULL;
3913 }
Benoît Canetc13163f2014-01-23 21:31:34 +01003914 entry = g_malloc0(sizeof(*entry));
Alberto Garciad5a8ee62015-04-17 14:52:43 +03003915 entry->value = info;
Benoît Canetc13163f2014-01-23 21:31:34 +01003916 entry->next = list;
3917 list = entry;
3918 }
3919
3920 return list;
3921}
3922
Benoît Canet12d3ba82014-01-23 21:31:35 +01003923BlockDriverState *bdrv_lookup_bs(const char *device,
3924 const char *node_name,
3925 Error **errp)
3926{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003927 BlockBackend *blk;
3928 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003929
Benoît Canet12d3ba82014-01-23 21:31:35 +01003930 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003931 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003932
Markus Armbruster7f06d472014-10-07 13:59:12 +02003933 if (blk) {
3934 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003935 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003936 }
3937
Benoît Canetdd67fa52014-02-12 17:15:06 +01003938 if (node_name) {
3939 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003940
Benoît Canetdd67fa52014-02-12 17:15:06 +01003941 if (bs) {
3942 return bs;
3943 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003944 }
3945
Benoît Canetdd67fa52014-02-12 17:15:06 +01003946 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3947 device ? device : "",
3948 node_name ? node_name : "");
3949 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003950}
3951
Jeff Cody5a6684d2014-06-25 15:40:09 -04003952/* If 'base' is in the same chain as 'top', return true. Otherwise,
3953 * return false. If either argument is NULL, return false. */
3954bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3955{
3956 while (top && top != base) {
3957 top = top->backing_hd;
3958 }
3959
3960 return top != NULL;
3961}
3962
Fam Zheng04df7652014-10-31 11:32:54 +08003963BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3964{
3965 if (!bs) {
3966 return QTAILQ_FIRST(&graph_bdrv_states);
3967 }
3968 return QTAILQ_NEXT(bs, node_list);
3969}
3970
Markus Armbruster2f399b02010-06-02 18:55:20 +02003971BlockDriverState *bdrv_next(BlockDriverState *bs)
3972{
3973 if (!bs) {
3974 return QTAILQ_FIRST(&bdrv_states);
3975 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003976 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003977}
3978
Fam Zheng20a9e772014-10-31 11:32:55 +08003979const char *bdrv_get_node_name(const BlockDriverState *bs)
3980{
3981 return bs->node_name;
3982}
3983
Markus Armbruster7f06d472014-10-07 13:59:12 +02003984/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003985const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003986{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003987 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003988}
3989
Alberto Garcia9b2aa842015-04-08 12:29:18 +03003990/* This can be used to identify nodes that might not have a device
3991 * name associated. Since node and device names live in the same
3992 * namespace, the result is unambiguous. The exception is if both are
3993 * absent, then this returns an empty (non-null) string. */
3994const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3995{
3996 return bs->blk ? blk_name(bs->blk) : bs->node_name;
3997}
3998
Markus Armbrusterc8433282012-06-05 16:49:24 +02003999int bdrv_get_flags(BlockDriverState *bs)
4000{
4001 return bs->open_flags;
4002}
4003
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004004int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00004005{
4006 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004007 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00004008
Benoît Canetdc364f42014-01-23 21:31:32 +01004009 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004010 AioContext *aio_context = bdrv_get_aio_context(bs);
4011 int ret;
4012
4013 aio_context_acquire(aio_context);
4014 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004015 if (ret < 0 && !result) {
4016 result = ret;
4017 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004018 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01004019 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004020
4021 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00004022}
4023
Peter Lieven3ac21622013-06-28 12:47:42 +02004024int bdrv_has_zero_init_1(BlockDriverState *bs)
4025{
4026 return 1;
4027}
4028
Kevin Wolff2feebb2010-04-14 17:30:35 +02004029int bdrv_has_zero_init(BlockDriverState *bs)
4030{
4031 assert(bs->drv);
4032
Paolo Bonzini11212d82013-09-04 19:00:27 +02004033 /* If BS is a copy on write image, it is initialized to
4034 the contents of the base image, which may not be zeroes. */
4035 if (bs->backing_hd) {
4036 return 0;
4037 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004038 if (bs->drv->bdrv_has_zero_init) {
4039 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004040 }
4041
Peter Lieven3ac21622013-06-28 12:47:42 +02004042 /* safe default */
4043 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004044}
4045
Peter Lieven4ce78692013-10-24 12:06:54 +02004046bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4047{
4048 BlockDriverInfo bdi;
4049
4050 if (bs->backing_hd) {
4051 return false;
4052 }
4053
4054 if (bdrv_get_info(bs, &bdi) == 0) {
4055 return bdi.unallocated_blocks_are_zero;
4056 }
4057
4058 return false;
4059}
4060
4061bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4062{
4063 BlockDriverInfo bdi;
4064
4065 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4066 return false;
4067 }
4068
4069 if (bdrv_get_info(bs, &bdi) == 0) {
4070 return bdi.can_write_zeroes_with_unmap;
4071 }
4072
4073 return false;
4074}
4075
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004076typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004077 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004078 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004079 int64_t sector_num;
4080 int nb_sectors;
4081 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004082 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004083 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004084} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004085
thsf58c7b32008-06-05 21:53:49 +00004086/*
Fam Zheng705be722014-11-10 17:10:38 +08004087 * Returns the allocation status of the specified sectors.
4088 * Drivers not implementing the functionality are assumed to not support
4089 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004090 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004091 * If 'sector_num' is beyond the end of the disk image the return value is 0
4092 * and 'pnum' is set to 0.
4093 *
thsf58c7b32008-06-05 21:53:49 +00004094 * 'pnum' is set to the number of sectors (including and immediately following
4095 * the specified sector) that are known to be in the same
4096 * allocated/unallocated state.
4097 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004098 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4099 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004100 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004101static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4102 int64_t sector_num,
4103 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004104{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004105 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004106 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004107 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004108
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004109 total_sectors = bdrv_nb_sectors(bs);
4110 if (total_sectors < 0) {
4111 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004112 }
4113
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004114 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004115 *pnum = 0;
4116 return 0;
4117 }
4118
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004119 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004120 if (n < nb_sectors) {
4121 nb_sectors = n;
4122 }
4123
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004124 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004125 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004126 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004127 if (bs->drv->protocol_name) {
4128 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4129 }
4130 return ret;
thsf58c7b32008-06-05 21:53:49 +00004131 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004132
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004133 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4134 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004135 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004136 return ret;
4137 }
4138
Peter Lieven92bc50a2013-10-08 14:43:14 +02004139 if (ret & BDRV_BLOCK_RAW) {
4140 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4141 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4142 *pnum, pnum);
4143 }
4144
Kevin Wolfe88ae222014-05-06 15:25:36 +02004145 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4146 ret |= BDRV_BLOCK_ALLOCATED;
4147 }
4148
Peter Lievenc3d86882013-10-24 12:07:04 +02004149 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4150 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004151 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004152 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004153 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004154 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4155 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004156 ret |= BDRV_BLOCK_ZERO;
4157 }
4158 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004159 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004160
4161 if (bs->file &&
4162 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4163 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004164 int file_pnum;
4165
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004166 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004167 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004168 if (ret2 >= 0) {
4169 /* Ignore errors. This is just providing extra information, it
4170 * is useful but not necessary.
4171 */
Max Reitz59c9a952014-10-22 17:00:15 +02004172 if (!file_pnum) {
4173 /* !file_pnum indicates an offset at or beyond the EOF; it is
4174 * perfectly valid for the format block driver to point to such
4175 * offsets, so catch it and mark everything as zero */
4176 ret |= BDRV_BLOCK_ZERO;
4177 } else {
4178 /* Limit request to the range reported by the protocol driver */
4179 *pnum = file_pnum;
4180 ret |= (ret2 & BDRV_BLOCK_ZERO);
4181 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004182 }
4183 }
4184
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004185 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004186}
4187
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004188/* Coroutine wrapper for bdrv_get_block_status() */
4189static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004190{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004191 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004192 BlockDriverState *bs = data->bs;
4193
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004194 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4195 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004196 data->done = true;
4197}
4198
4199/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004200 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004201 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004202 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004203 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004204int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4205 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004206{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004207 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004208 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004209 .bs = bs,
4210 .sector_num = sector_num,
4211 .nb_sectors = nb_sectors,
4212 .pnum = pnum,
4213 .done = false,
4214 };
4215
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004216 if (qemu_in_coroutine()) {
4217 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004218 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004219 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004220 AioContext *aio_context = bdrv_get_aio_context(bs);
4221
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004222 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004223 qemu_coroutine_enter(co, &data);
4224 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004225 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004226 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004227 }
4228 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004229}
4230
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004231int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4232 int nb_sectors, int *pnum)
4233{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004234 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4235 if (ret < 0) {
4236 return ret;
4237 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004238 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004239}
4240
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004241/*
4242 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4243 *
4244 * Return true if the given sector is allocated in any image between
4245 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4246 * sector is allocated in any image of the chain. Return false otherwise.
4247 *
4248 * 'pnum' is set to the number of sectors (including and immediately following
4249 * the specified sector) that are known to be in the same
4250 * allocated/unallocated state.
4251 *
4252 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004253int bdrv_is_allocated_above(BlockDriverState *top,
4254 BlockDriverState *base,
4255 int64_t sector_num,
4256 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004257{
4258 BlockDriverState *intermediate;
4259 int ret, n = nb_sectors;
4260
4261 intermediate = top;
4262 while (intermediate && intermediate != base) {
4263 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004264 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4265 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004266 if (ret < 0) {
4267 return ret;
4268 } else if (ret) {
4269 *pnum = pnum_inter;
4270 return 1;
4271 }
4272
4273 /*
4274 * [sector_num, nb_sectors] is unallocated on top but intermediate
4275 * might have
4276 *
4277 * [sector_num+x, nr_sectors] allocated.
4278 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004279 if (n > pnum_inter &&
4280 (intermediate == top ||
4281 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004282 n = pnum_inter;
4283 }
4284
4285 intermediate = intermediate->backing_hd;
4286 }
4287
4288 *pnum = n;
4289 return 0;
4290}
4291
aliguori045df332009-03-05 23:00:48 +00004292const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4293{
4294 if (bs->backing_hd && bs->backing_hd->encrypted)
4295 return bs->backing_file;
4296 else if (bs->encrypted)
4297 return bs->filename;
4298 else
4299 return NULL;
4300}
4301
ths5fafdf22007-09-16 21:08:06 +00004302void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004303 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004304{
Kevin Wolf3574c602011-10-26 11:02:11 +02004305 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004306}
4307
ths5fafdf22007-09-16 21:08:06 +00004308int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004309 const uint8_t *buf, int nb_sectors)
4310{
4311 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004312 int ret;
4313
4314 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004315 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004316 }
4317 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004318 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004319 }
4320 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4321 if (ret < 0) {
4322 return ret;
4323 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004324
Fam Zhenge4654d22013-11-13 18:29:43 +08004325 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004326
bellardfaea38e2006-08-05 21:31:00 +00004327 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4328}
ths3b46e622007-09-17 08:09:54 +00004329
bellardfaea38e2006-08-05 21:31:00 +00004330int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4331{
4332 BlockDriver *drv = bs->drv;
4333 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004334 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004335 if (!drv->bdrv_get_info)
4336 return -ENOTSUP;
4337 memset(bdi, 0, sizeof(*bdi));
4338 return drv->bdrv_get_info(bs, bdi);
4339}
4340
Max Reitzeae041f2013-10-09 10:46:16 +02004341ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4342{
4343 BlockDriver *drv = bs->drv;
4344 if (drv && drv->bdrv_get_specific_info) {
4345 return drv->bdrv_get_specific_info(bs);
4346 }
4347 return NULL;
4348}
4349
Christoph Hellwig45566e92009-07-10 23:11:57 +02004350int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4351 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004352{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004353 QEMUIOVector qiov;
4354 struct iovec iov = {
4355 .iov_base = (void *) buf,
4356 .iov_len = size,
4357 };
4358
4359 qemu_iovec_init_external(&qiov, &iov, 1);
4360 return bdrv_writev_vmstate(bs, &qiov, pos);
4361}
4362
4363int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4364{
aliguori178e08a2009-04-05 19:10:55 +00004365 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004366
4367 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004368 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004369 } else if (drv->bdrv_save_vmstate) {
4370 return drv->bdrv_save_vmstate(bs, qiov, pos);
4371 } else if (bs->file) {
4372 return bdrv_writev_vmstate(bs->file, qiov, pos);
4373 }
4374
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004375 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004376}
4377
Christoph Hellwig45566e92009-07-10 23:11:57 +02004378int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4379 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004380{
4381 BlockDriver *drv = bs->drv;
4382 if (!drv)
4383 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004384 if (drv->bdrv_load_vmstate)
4385 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4386 if (bs->file)
4387 return bdrv_load_vmstate(bs->file, buf, pos, size);
4388 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004389}
4390
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004391void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4392{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004393 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004394 return;
4395 }
4396
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004397 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004398}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004399
Kevin Wolf41c695c2012-12-06 14:32:58 +01004400int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4401 const char *tag)
4402{
4403 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4404 bs = bs->file;
4405 }
4406
4407 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4408 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4409 }
4410
4411 return -ENOTSUP;
4412}
4413
Fam Zheng4cc70e92013-11-20 10:01:54 +08004414int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4415{
4416 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4417 bs = bs->file;
4418 }
4419
4420 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4421 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4422 }
4423
4424 return -ENOTSUP;
4425}
4426
Kevin Wolf41c695c2012-12-06 14:32:58 +01004427int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4428{
Max Reitz938789e2014-03-10 23:44:08 +01004429 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004430 bs = bs->file;
4431 }
4432
4433 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4434 return bs->drv->bdrv_debug_resume(bs, tag);
4435 }
4436
4437 return -ENOTSUP;
4438}
4439
4440bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4441{
4442 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4443 bs = bs->file;
4444 }
4445
4446 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4447 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4448 }
4449
4450 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004451}
4452
Blue Swirl199630b2010-07-25 20:49:34 +00004453int bdrv_is_snapshot(BlockDriverState *bs)
4454{
4455 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4456}
4457
Jeff Codyb1b1d782012-10-16 15:49:09 -04004458/* backing_file can either be relative, or absolute, or a protocol. If it is
4459 * relative, it must be relative to the chain. So, passing in bs->filename
4460 * from a BDS as backing_file should not be done, as that may be relative to
4461 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004462BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4463 const char *backing_file)
4464{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004465 char *filename_full = NULL;
4466 char *backing_file_full = NULL;
4467 char *filename_tmp = NULL;
4468 int is_protocol = 0;
4469 BlockDriverState *curr_bs = NULL;
4470 BlockDriverState *retval = NULL;
4471
4472 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004473 return NULL;
4474 }
4475
Jeff Codyb1b1d782012-10-16 15:49:09 -04004476 filename_full = g_malloc(PATH_MAX);
4477 backing_file_full = g_malloc(PATH_MAX);
4478 filename_tmp = g_malloc(PATH_MAX);
4479
4480 is_protocol = path_has_protocol(backing_file);
4481
4482 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4483
4484 /* If either of the filename paths is actually a protocol, then
4485 * compare unmodified paths; otherwise make paths relative */
4486 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4487 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4488 retval = curr_bs->backing_hd;
4489 break;
4490 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004491 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004492 /* If not an absolute filename path, make it relative to the current
4493 * image's filename path */
4494 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4495 backing_file);
4496
4497 /* We are going to compare absolute pathnames */
4498 if (!realpath(filename_tmp, filename_full)) {
4499 continue;
4500 }
4501
4502 /* We need to make sure the backing filename we are comparing against
4503 * is relative to the current image filename (or absolute) */
4504 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4505 curr_bs->backing_file);
4506
4507 if (!realpath(filename_tmp, backing_file_full)) {
4508 continue;
4509 }
4510
4511 if (strcmp(backing_file_full, filename_full) == 0) {
4512 retval = curr_bs->backing_hd;
4513 break;
4514 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004515 }
4516 }
4517
Jeff Codyb1b1d782012-10-16 15:49:09 -04004518 g_free(filename_full);
4519 g_free(backing_file_full);
4520 g_free(filename_tmp);
4521 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004522}
4523
Benoît Canetf198fd12012-08-02 10:22:47 +02004524int bdrv_get_backing_file_depth(BlockDriverState *bs)
4525{
4526 if (!bs->drv) {
4527 return 0;
4528 }
4529
4530 if (!bs->backing_hd) {
4531 return 0;
4532 }
4533
4534 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4535}
4536
bellard83f64092006-08-01 16:21:11 +00004537/**************************************************************/
4538/* async I/Os */
4539
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004540BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4541 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004542 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004543{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004544 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4545
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004546 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004547 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004548}
4549
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004550BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4551 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004552 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004553{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004554 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4555
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004556 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004557 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004558}
4559
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004560BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004561 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004562 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004563{
4564 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4565
4566 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4567 BDRV_REQ_ZERO_WRITE | flags,
4568 cb, opaque, true);
4569}
4570
Kevin Wolf40b4f532009-09-09 17:53:37 +02004571
4572typedef struct MultiwriteCB {
4573 int error;
4574 int num_requests;
4575 int num_callbacks;
4576 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004577 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004578 void *opaque;
4579 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004580 } callbacks[];
4581} MultiwriteCB;
4582
4583static void multiwrite_user_cb(MultiwriteCB *mcb)
4584{
4585 int i;
4586
4587 for (i = 0; i < mcb->num_callbacks; i++) {
4588 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004589 if (mcb->callbacks[i].free_qiov) {
4590 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4591 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004592 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004593 }
4594}
4595
4596static void multiwrite_cb(void *opaque, int ret)
4597{
4598 MultiwriteCB *mcb = opaque;
4599
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004600 trace_multiwrite_cb(mcb, ret);
4601
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004602 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004603 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004604 }
4605
4606 mcb->num_requests--;
4607 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004608 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004609 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004610 }
4611}
4612
4613static int multiwrite_req_compare(const void *a, const void *b)
4614{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004615 const BlockRequest *req1 = a, *req2 = b;
4616
4617 /*
4618 * Note that we can't simply subtract req2->sector from req1->sector
4619 * here as that could overflow the return value.
4620 */
4621 if (req1->sector > req2->sector) {
4622 return 1;
4623 } else if (req1->sector < req2->sector) {
4624 return -1;
4625 } else {
4626 return 0;
4627 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004628}
4629
4630/*
4631 * Takes a bunch of requests and tries to merge them. Returns the number of
4632 * requests that remain after merging.
4633 */
4634static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4635 int num_reqs, MultiwriteCB *mcb)
4636{
4637 int i, outidx;
4638
4639 // Sort requests by start sector
4640 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4641
4642 // Check if adjacent requests touch the same clusters. If so, combine them,
4643 // filling up gaps with zero sectors.
4644 outidx = 0;
4645 for (i = 1; i < num_reqs; i++) {
4646 int merge = 0;
4647 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4648
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004649 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004650 if (reqs[i].sector <= oldreq_last) {
4651 merge = 1;
4652 }
4653
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004654 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4655 merge = 0;
4656 }
4657
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004658 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4659 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4660 merge = 0;
4661 }
4662
Kevin Wolf40b4f532009-09-09 17:53:37 +02004663 if (merge) {
4664 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004665 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004666 qemu_iovec_init(qiov,
4667 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4668
4669 // Add the first request to the merged one. If the requests are
4670 // overlapping, drop the last sectors of the first request.
4671 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004672 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004673
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004674 // We should need to add any zeros between the two requests
4675 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004676
4677 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004678 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004679
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004680 // Add tail of first request, if necessary
4681 if (qiov->size < reqs[outidx].qiov->size) {
4682 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4683 reqs[outidx].qiov->size - qiov->size);
4684 }
4685
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004686 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004687 reqs[outidx].qiov = qiov;
4688
4689 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4690 } else {
4691 outidx++;
4692 reqs[outidx].sector = reqs[i].sector;
4693 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4694 reqs[outidx].qiov = reqs[i].qiov;
4695 }
4696 }
4697
Peter Lievenf4564d52015-02-02 14:52:18 +01004698 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4699
Kevin Wolf40b4f532009-09-09 17:53:37 +02004700 return outidx + 1;
4701}
4702
4703/*
4704 * Submit multiple AIO write requests at once.
4705 *
4706 * On success, the function returns 0 and all requests in the reqs array have
4707 * been submitted. In error case this function returns -1, and any of the
4708 * requests may or may not be submitted yet. In particular, this means that the
4709 * callback will be called for some of the requests, for others it won't. The
4710 * caller must check the error field of the BlockRequest to wait for the right
4711 * callbacks (if error != 0, no callback will be called).
4712 *
4713 * The implementation may modify the contents of the reqs array, e.g. to merge
4714 * requests. However, the fields opaque and error are left unmodified as they
4715 * are used to signal failure for a single request to the caller.
4716 */
4717int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4718{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004719 MultiwriteCB *mcb;
4720 int i;
4721
Ryan Harper301db7c2011-03-07 10:01:04 -06004722 /* don't submit writes if we don't have a medium */
4723 if (bs->drv == NULL) {
4724 for (i = 0; i < num_reqs; i++) {
4725 reqs[i].error = -ENOMEDIUM;
4726 }
4727 return -1;
4728 }
4729
Kevin Wolf40b4f532009-09-09 17:53:37 +02004730 if (num_reqs == 0) {
4731 return 0;
4732 }
4733
4734 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004735 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004736 mcb->num_requests = 0;
4737 mcb->num_callbacks = num_reqs;
4738
4739 for (i = 0; i < num_reqs; i++) {
4740 mcb->callbacks[i].cb = reqs[i].cb;
4741 mcb->callbacks[i].opaque = reqs[i].opaque;
4742 }
4743
4744 // Check for mergable requests
4745 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4746
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004747 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4748
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004749 /* Run the aio requests. */
4750 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004751 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004752 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4753 reqs[i].nb_sectors, reqs[i].flags,
4754 multiwrite_cb, mcb,
4755 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004756 }
4757
4758 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004759}
4760
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004761void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004762{
Fam Zhengca5fd112014-09-11 13:41:27 +08004763 qemu_aio_ref(acb);
4764 bdrv_aio_cancel_async(acb);
4765 while (acb->refcnt > 1) {
4766 if (acb->aiocb_info->get_aio_context) {
4767 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4768 } else if (acb->bs) {
4769 aio_poll(bdrv_get_aio_context(acb->bs), true);
4770 } else {
4771 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004772 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004773 }
Fam Zheng80074292014-09-11 13:41:28 +08004774 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004775}
4776
4777/* Async version of aio cancel. The caller is not blocked if the acb implements
4778 * cancel_async, otherwise we do nothing and let the request normally complete.
4779 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004780void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004781{
4782 if (acb->aiocb_info->cancel_async) {
4783 acb->aiocb_info->cancel_async(acb);
4784 }
bellard83f64092006-08-01 16:21:11 +00004785}
4786
4787/**************************************************************/
4788/* async block device emulation */
4789
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004790typedef struct BlockAIOCBSync {
4791 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004792 QEMUBH *bh;
4793 int ret;
4794 /* vector translation state */
4795 QEMUIOVector *qiov;
4796 uint8_t *bounce;
4797 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004798} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004799
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004800static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004801 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004802};
4803
bellard83f64092006-08-01 16:21:11 +00004804static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004805{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004806 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004807
Kevin Wolf857d4f42014-05-20 13:16:51 +02004808 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004809 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004810 }
aliguoriceb42de2009-04-07 18:43:28 +00004811 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004812 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004813 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004814 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004815 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004816}
bellardbeac80c2006-06-26 20:08:57 +00004817
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004818static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4819 int64_t sector_num,
4820 QEMUIOVector *qiov,
4821 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004822 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004823 void *opaque,
4824 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004825
bellardea2384d2004-08-01 21:59:26 +00004826{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004827 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004828
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004829 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004830 acb->is_write = is_write;
4831 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004832 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004833 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004834
Kevin Wolf857d4f42014-05-20 13:16:51 +02004835 if (acb->bounce == NULL) {
4836 acb->ret = -ENOMEM;
4837 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004838 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004839 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004840 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004841 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004842 }
4843
pbrookce1a14d2006-08-07 02:38:06 +00004844 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004845
pbrookce1a14d2006-08-07 02:38:06 +00004846 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004847}
4848
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004849static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004850 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004851 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004852{
aliguorif141eaf2009-04-07 18:43:24 +00004853 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004854}
4855
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004856static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004857 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004858 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004859{
4860 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4861}
4862
Kevin Wolf68485422011-06-30 10:05:46 +02004863
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004864typedef struct BlockAIOCBCoroutine {
4865 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004866 BlockRequest req;
4867 bool is_write;
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004868 bool need_bh;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004869 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004870 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004871} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004872
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004873static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004874 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004875};
4876
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004877static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
4878{
4879 if (!acb->need_bh) {
4880 acb->common.cb(acb->common.opaque, acb->req.error);
4881 qemu_aio_unref(acb);
4882 }
4883}
4884
Paolo Bonzini35246a62011-10-14 10:41:29 +02004885static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004886{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004887 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004888
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004889 assert(!acb->need_bh);
Kevin Wolf68485422011-06-30 10:05:46 +02004890 qemu_bh_delete(acb->bh);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004891 bdrv_co_complete(acb);
4892}
4893
4894static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
4895{
4896 acb->need_bh = false;
4897 if (acb->req.error != -EINPROGRESS) {
4898 BlockDriverState *bs = acb->common.bs;
4899
4900 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4901 qemu_bh_schedule(acb->bh);
4902 }
Kevin Wolf68485422011-06-30 10:05:46 +02004903}
4904
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004905/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4906static void coroutine_fn bdrv_co_do_rw(void *opaque)
4907{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004908 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004909 BlockDriverState *bs = acb->common.bs;
4910
4911 if (!acb->is_write) {
4912 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004913 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004914 } else {
4915 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004916 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004917 }
4918
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004919 bdrv_co_complete(acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004920}
4921
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004922static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4923 int64_t sector_num,
4924 QEMUIOVector *qiov,
4925 int nb_sectors,
4926 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004927 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004928 void *opaque,
4929 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004930{
4931 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004932 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004933
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004934 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004935 acb->need_bh = true;
4936 acb->req.error = -EINPROGRESS;
Kevin Wolf68485422011-06-30 10:05:46 +02004937 acb->req.sector = sector_num;
4938 acb->req.nb_sectors = nb_sectors;
4939 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004940 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004941 acb->is_write = is_write;
4942
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004943 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004944 qemu_coroutine_enter(co, acb);
4945
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004946 bdrv_co_maybe_schedule_bh(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004947 return &acb->common;
4948}
4949
Paolo Bonzini07f07612011-10-17 12:32:12 +02004950static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004951{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004952 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004953 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004954
Paolo Bonzini07f07612011-10-17 12:32:12 +02004955 acb->req.error = bdrv_co_flush(bs);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004956 bdrv_co_complete(acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004957}
4958
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004959BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004960 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004961{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004962 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004963
Paolo Bonzini07f07612011-10-17 12:32:12 +02004964 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004965 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004966
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004967 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004968 acb->need_bh = true;
4969 acb->req.error = -EINPROGRESS;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004970
Paolo Bonzini07f07612011-10-17 12:32:12 +02004971 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4972 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004973
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004974 bdrv_co_maybe_schedule_bh(acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004975 return &acb->common;
4976}
4977
Paolo Bonzini4265d622011-10-17 12:32:14 +02004978static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4979{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004980 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004981 BlockDriverState *bs = acb->common.bs;
4982
4983 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004984 bdrv_co_complete(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004985}
4986
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004987BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004988 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004989 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004990{
4991 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004992 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004993
4994 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4995
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004996 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01004997 acb->need_bh = true;
4998 acb->req.error = -EINPROGRESS;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004999 acb->req.sector = sector_num;
5000 acb->req.nb_sectors = nb_sectors;
5001 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
5002 qemu_coroutine_enter(co, acb);
5003
Paolo Bonzini0b5a2442015-03-28 07:37:18 +01005004 bdrv_co_maybe_schedule_bh(acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005005 return &acb->common;
5006}
5007
bellardea2384d2004-08-01 21:59:26 +00005008void bdrv_init(void)
5009{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05005010 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00005011}
pbrookce1a14d2006-08-07 02:38:06 +00005012
Markus Armbrustereb852012009-10-27 18:41:44 +01005013void bdrv_init_with_whitelist(void)
5014{
5015 use_bdrv_whitelist = 1;
5016 bdrv_init();
5017}
5018
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005019void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02005020 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00005021{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005022 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00005023
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01005024 acb = g_slice_alloc(aiocb_info->aiocb_size);
5025 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00005026 acb->bs = bs;
5027 acb->cb = cb;
5028 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08005029 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00005030 return acb;
5031}
5032
Fam Zhengf197fe22014-09-11 13:41:08 +08005033void qemu_aio_ref(void *p)
5034{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005035 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005036 acb->refcnt++;
5037}
5038
Fam Zheng80074292014-09-11 13:41:28 +08005039void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00005040{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005041 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005042 assert(acb->refcnt > 0);
5043 if (--acb->refcnt == 0) {
5044 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5045 }
pbrookce1a14d2006-08-07 02:38:06 +00005046}
bellard19cb3732006-08-19 11:45:59 +00005047
5048/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005049/* Coroutine block device emulation */
5050
5051typedef struct CoroutineIOCompletion {
5052 Coroutine *coroutine;
5053 int ret;
5054} CoroutineIOCompletion;
5055
5056static void bdrv_co_io_em_complete(void *opaque, int ret)
5057{
5058 CoroutineIOCompletion *co = opaque;
5059
5060 co->ret = ret;
5061 qemu_coroutine_enter(co->coroutine, NULL);
5062}
5063
5064static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5065 int nb_sectors, QEMUIOVector *iov,
5066 bool is_write)
5067{
5068 CoroutineIOCompletion co = {
5069 .coroutine = qemu_coroutine_self(),
5070 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005071 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005072
5073 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005074 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5075 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005076 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005077 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5078 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005079 }
5080
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005081 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005082 if (!acb) {
5083 return -EIO;
5084 }
5085 qemu_coroutine_yield();
5086
5087 return co.ret;
5088}
5089
5090static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5091 int64_t sector_num, int nb_sectors,
5092 QEMUIOVector *iov)
5093{
5094 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5095}
5096
5097static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5098 int64_t sector_num, int nb_sectors,
5099 QEMUIOVector *iov)
5100{
5101 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5102}
5103
Paolo Bonzini07f07612011-10-17 12:32:12 +02005104static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005105{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005106 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005107
Paolo Bonzini07f07612011-10-17 12:32:12 +02005108 rwco->ret = bdrv_co_flush(rwco->bs);
5109}
5110
5111int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5112{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005113 int ret;
5114
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005115 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005116 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005117 }
5118
Kevin Wolfca716362011-11-10 18:13:59 +01005119 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005120 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005121 if (bs->drv->bdrv_co_flush_to_os) {
5122 ret = bs->drv->bdrv_co_flush_to_os(bs);
5123 if (ret < 0) {
5124 return ret;
5125 }
5126 }
5127
Kevin Wolfca716362011-11-10 18:13:59 +01005128 /* But don't actually force it to the disk with cache=unsafe */
5129 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005130 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005131 }
5132
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005133 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005134 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005135 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005136 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005137 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005138 CoroutineIOCompletion co = {
5139 .coroutine = qemu_coroutine_self(),
5140 };
5141
5142 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5143 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005144 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005145 } else {
5146 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005147 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005148 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005149 } else {
5150 /*
5151 * Some block drivers always operate in either writethrough or unsafe
5152 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5153 * know how the server works (because the behaviour is hardcoded or
5154 * depends on server-side configuration), so we can't ensure that
5155 * everything is safe on disk. Returning an error doesn't work because
5156 * that would break guests even if the server operates in writethrough
5157 * mode.
5158 *
5159 * Let's hope the user knows what he's doing.
5160 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005161 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005162 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005163 if (ret < 0) {
5164 return ret;
5165 }
5166
5167 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5168 * in the case of cache=unsafe, so there are no useless flushes.
5169 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005170flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005171 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005172}
5173
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005174void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005175{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005176 Error *local_err = NULL;
5177 int ret;
5178
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005179 if (!bs->drv) {
5180 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005181 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005182
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005183 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5184 return;
5185 }
5186 bs->open_flags &= ~BDRV_O_INCOMING;
5187
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005188 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005189 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005190 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005191 bdrv_invalidate_cache(bs->file, &local_err);
5192 }
5193 if (local_err) {
5194 error_propagate(errp, local_err);
5195 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005196 }
5197
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005198 ret = refresh_total_sectors(bs, bs->total_sectors);
5199 if (ret < 0) {
5200 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5201 return;
5202 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005203}
5204
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005205void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005206{
5207 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005208 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005209
Benoît Canetdc364f42014-01-23 21:31:32 +01005210 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005211 AioContext *aio_context = bdrv_get_aio_context(bs);
5212
5213 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005214 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005215 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005216 if (local_err) {
5217 error_propagate(errp, local_err);
5218 return;
5219 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005220 }
5221}
5222
Paolo Bonzini07f07612011-10-17 12:32:12 +02005223int bdrv_flush(BlockDriverState *bs)
5224{
5225 Coroutine *co;
5226 RwCo rwco = {
5227 .bs = bs,
5228 .ret = NOT_DONE,
5229 };
5230
5231 if (qemu_in_coroutine()) {
5232 /* Fast-path if already in coroutine context */
5233 bdrv_flush_co_entry(&rwco);
5234 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005235 AioContext *aio_context = bdrv_get_aio_context(bs);
5236
Paolo Bonzini07f07612011-10-17 12:32:12 +02005237 co = qemu_coroutine_create(bdrv_flush_co_entry);
5238 qemu_coroutine_enter(co, &rwco);
5239 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005240 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005241 }
5242 }
5243
5244 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005245}
5246
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005247typedef struct DiscardCo {
5248 BlockDriverState *bs;
5249 int64_t sector_num;
5250 int nb_sectors;
5251 int ret;
5252} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005253static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5254{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005255 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005256
5257 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5258}
5259
5260int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5261 int nb_sectors)
5262{
Max Reitzb9c64942015-02-05 13:58:25 -05005263 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005264
Paolo Bonzini4265d622011-10-17 12:32:14 +02005265 if (!bs->drv) {
5266 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005267 }
5268
5269 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5270 if (ret < 0) {
5271 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005272 } else if (bs->read_only) {
5273 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005274 }
5275
Fam Zhenge4654d22013-11-13 18:29:43 +08005276 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005277
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005278 /* Do nothing if disabled. */
5279 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5280 return 0;
5281 }
5282
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005283 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005284 return 0;
5285 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005286
Peter Lieven75af1f32015-02-06 11:54:11 +01005287 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005288 while (nb_sectors > 0) {
5289 int ret;
5290 int num = nb_sectors;
5291
5292 /* align request */
5293 if (bs->bl.discard_alignment &&
5294 num >= bs->bl.discard_alignment &&
5295 sector_num % bs->bl.discard_alignment) {
5296 if (num > bs->bl.discard_alignment) {
5297 num = bs->bl.discard_alignment;
5298 }
5299 num -= sector_num % bs->bl.discard_alignment;
5300 }
5301
5302 /* limit request size */
5303 if (num > max_discard) {
5304 num = max_discard;
5305 }
5306
5307 if (bs->drv->bdrv_co_discard) {
5308 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5309 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005310 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005311 CoroutineIOCompletion co = {
5312 .coroutine = qemu_coroutine_self(),
5313 };
5314
5315 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5316 bdrv_co_io_em_complete, &co);
5317 if (acb == NULL) {
5318 return -EIO;
5319 } else {
5320 qemu_coroutine_yield();
5321 ret = co.ret;
5322 }
5323 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005324 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005325 return ret;
5326 }
5327
5328 sector_num += num;
5329 nb_sectors -= num;
5330 }
5331 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005332}
5333
5334int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5335{
5336 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005337 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005338 .bs = bs,
5339 .sector_num = sector_num,
5340 .nb_sectors = nb_sectors,
5341 .ret = NOT_DONE,
5342 };
5343
5344 if (qemu_in_coroutine()) {
5345 /* Fast-path if already in coroutine context */
5346 bdrv_discard_co_entry(&rwco);
5347 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005348 AioContext *aio_context = bdrv_get_aio_context(bs);
5349
Paolo Bonzini4265d622011-10-17 12:32:14 +02005350 co = qemu_coroutine_create(bdrv_discard_co_entry);
5351 qemu_coroutine_enter(co, &rwco);
5352 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005353 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005354 }
5355 }
5356
5357 return rwco.ret;
5358}
5359
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005360/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005361/* removable device support */
5362
5363/**
5364 * Return TRUE if the media is present
5365 */
5366int bdrv_is_inserted(BlockDriverState *bs)
5367{
5368 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005369
bellard19cb3732006-08-19 11:45:59 +00005370 if (!drv)
5371 return 0;
5372 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005373 return 1;
5374 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005375}
5376
5377/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005378 * Return whether the media changed since the last call to this
5379 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005380 */
5381int bdrv_media_changed(BlockDriverState *bs)
5382{
5383 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005384
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005385 if (drv && drv->bdrv_media_changed) {
5386 return drv->bdrv_media_changed(bs);
5387 }
5388 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005389}
5390
5391/**
5392 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5393 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005394void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005395{
5396 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005397 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005398
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005399 if (drv && drv->bdrv_eject) {
5400 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005401 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005402
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005403 device_name = bdrv_get_device_name(bs);
5404 if (device_name[0] != '\0') {
5405 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005406 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005407 }
bellard19cb3732006-08-19 11:45:59 +00005408}
5409
bellard19cb3732006-08-19 11:45:59 +00005410/**
5411 * Lock or unlock the media (if it is locked, the user won't be able
5412 * to eject it manually).
5413 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005414void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005415{
5416 BlockDriver *drv = bs->drv;
5417
Markus Armbruster025e8492011-09-06 18:58:47 +02005418 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005419
Markus Armbruster025e8492011-09-06 18:58:47 +02005420 if (drv && drv->bdrv_lock_medium) {
5421 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005422 }
5423}
ths985a03b2007-12-24 16:10:43 +00005424
5425/* needed for generic scsi interface */
5426
5427int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5428{
5429 BlockDriver *drv = bs->drv;
5430
5431 if (drv && drv->bdrv_ioctl)
5432 return drv->bdrv_ioctl(bs, req, buf);
5433 return -ENOTSUP;
5434}
aliguori7d780662009-03-12 19:57:08 +00005435
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005436BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005437 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005438 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005439{
aliguori221f7152009-03-28 17:28:41 +00005440 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005441
aliguori221f7152009-03-28 17:28:41 +00005442 if (drv && drv->bdrv_aio_ioctl)
5443 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5444 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005445}
aliguorie268ca52009-04-22 20:20:00 +00005446
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005447void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005448{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005449 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005450}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005451
aliguorie268ca52009-04-22 20:20:00 +00005452void *qemu_blockalign(BlockDriverState *bs, size_t size)
5453{
Kevin Wolf339064d2013-11-28 10:23:32 +01005454 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005455}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005456
Max Reitz9ebd8442014-10-22 14:09:27 +02005457void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5458{
5459 return memset(qemu_blockalign(bs, size), 0, size);
5460}
5461
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005462void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5463{
5464 size_t align = bdrv_opt_mem_align(bs);
5465
5466 /* Ensure that NULL is never returned on success */
5467 assert(align > 0);
5468 if (size == 0) {
5469 size = align;
5470 }
5471
5472 return qemu_try_memalign(align, size);
5473}
5474
Max Reitz9ebd8442014-10-22 14:09:27 +02005475void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5476{
5477 void *mem = qemu_try_blockalign(bs, size);
5478
5479 if (mem) {
5480 memset(mem, 0, size);
5481 }
5482
5483 return mem;
5484}
5485
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005486/*
5487 * Check if all memory in this vector is sector aligned.
5488 */
5489bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5490{
5491 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005492 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005493
5494 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005495 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005496 return false;
5497 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005498 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005499 return false;
5500 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005501 }
5502
5503 return true;
5504}
5505
Fam Zheng0db6e542015-04-17 19:49:50 -04005506BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
5507{
5508 BdrvDirtyBitmap *bm;
5509
5510 assert(name);
5511 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5512 if (bm->name && !strcmp(name, bm->name)) {
5513 return bm;
5514 }
5515 }
5516 return NULL;
5517}
5518
5519void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5520{
5521 g_free(bitmap->name);
5522 bitmap->name = NULL;
5523}
5524
5525BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
John Snow5fba6c02015-04-17 19:49:51 -04005526 uint32_t granularity,
Fam Zheng0db6e542015-04-17 19:49:50 -04005527 const char *name,
Fam Zhengb8afb522014-04-16 09:34:30 +08005528 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005529{
5530 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005531 BdrvDirtyBitmap *bitmap;
John Snow5fba6c02015-04-17 19:49:51 -04005532 uint32_t sector_granularity;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005533
Paolo Bonzini50717e92013-01-21 17:09:45 +01005534 assert((granularity & (granularity - 1)) == 0);
5535
Fam Zheng0db6e542015-04-17 19:49:50 -04005536 if (name && bdrv_find_dirty_bitmap(bs, name)) {
5537 error_setg(errp, "Bitmap already exists: %s", name);
5538 return NULL;
5539 }
John Snow5fba6c02015-04-17 19:49:51 -04005540 sector_granularity = granularity >> BDRV_SECTOR_BITS;
5541 assert(sector_granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005542 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005543 if (bitmap_size < 0) {
5544 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5545 errno = -bitmap_size;
5546 return NULL;
5547 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005548 bitmap = g_new0(BdrvDirtyBitmap, 1);
John Snow5fba6c02015-04-17 19:49:51 -04005549 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
Fam Zheng0db6e542015-04-17 19:49:50 -04005550 bitmap->name = g_strdup(name);
John Snowb8e6fb72015-04-17 19:49:56 -04005551 bitmap->disabled = false;
Fam Zhenge4654d22013-11-13 18:29:43 +08005552 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5553 return bitmap;
5554}
5555
John Snowb8e6fb72015-04-17 19:49:56 -04005556bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
5557{
5558 return !bitmap->disabled;
5559}
5560
Fam Zhenge4654d22013-11-13 18:29:43 +08005561void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5562{
5563 BdrvDirtyBitmap *bm, *next;
5564 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5565 if (bm == bitmap) {
5566 QLIST_REMOVE(bitmap, list);
5567 hbitmap_free(bitmap->bitmap);
Fam Zheng0db6e542015-04-17 19:49:50 -04005568 g_free(bitmap->name);
Fam Zhenge4654d22013-11-13 18:29:43 +08005569 g_free(bitmap);
5570 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005571 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005572 }
5573}
5574
John Snowb8e6fb72015-04-17 19:49:56 -04005575void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5576{
5577 bitmap->disabled = true;
5578}
5579
5580void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
5581{
5582 bitmap->disabled = false;
5583}
5584
Fam Zheng21b56832013-11-13 18:29:44 +08005585BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5586{
5587 BdrvDirtyBitmap *bm;
5588 BlockDirtyInfoList *list = NULL;
5589 BlockDirtyInfoList **plist = &list;
5590
5591 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005592 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5593 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005594 info->count = bdrv_get_dirty_count(bs, bm);
John Snow592fdd02015-04-17 19:49:53 -04005595 info->granularity = bdrv_dirty_bitmap_granularity(bm);
Fam Zheng0db6e542015-04-17 19:49:50 -04005596 info->has_name = !!bm->name;
5597 info->name = g_strdup(bm->name);
Fam Zheng21b56832013-11-13 18:29:44 +08005598 entry->value = info;
5599 *plist = entry;
5600 plist = &entry->next;
5601 }
5602
5603 return list;
5604}
5605
Fam Zhenge4654d22013-11-13 18:29:43 +08005606int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005607{
Fam Zhenge4654d22013-11-13 18:29:43 +08005608 if (bitmap) {
5609 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005610 } else {
5611 return 0;
5612 }
5613}
5614
John Snow341ebc22015-04-17 19:49:52 -04005615/**
5616 * Chooses a default granularity based on the existing cluster size,
5617 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
5618 * is no cluster size information available.
5619 */
5620uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
5621{
5622 BlockDriverInfo bdi;
5623 uint32_t granularity;
5624
5625 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
5626 granularity = MAX(4096, bdi.cluster_size);
5627 granularity = MIN(65536, granularity);
5628 } else {
5629 granularity = 65536;
5630 }
5631
5632 return granularity;
5633}
5634
John Snow592fdd02015-04-17 19:49:53 -04005635uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
5636{
5637 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
5638}
5639
Fam Zhenge4654d22013-11-13 18:29:43 +08005640void bdrv_dirty_iter_init(BlockDriverState *bs,
5641 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005642{
Fam Zhenge4654d22013-11-13 18:29:43 +08005643 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005644}
5645
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005646void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5647 int64_t cur_sector, int nr_sectors)
5648{
John Snowb8e6fb72015-04-17 19:49:56 -04005649 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005650 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5651}
5652
5653void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5654 int64_t cur_sector, int nr_sectors)
5655{
John Snowb8e6fb72015-04-17 19:49:56 -04005656 assert(bdrv_dirty_bitmap_enabled(bitmap));
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005657 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5658}
5659
5660static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5661 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005662{
Fam Zhenge4654d22013-11-13 18:29:43 +08005663 BdrvDirtyBitmap *bitmap;
5664 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005665 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5666 continue;
5667 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005668 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005669 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005670}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005671
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005672static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5673 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005674{
5675 BdrvDirtyBitmap *bitmap;
5676 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
John Snowb8e6fb72015-04-17 19:49:56 -04005677 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
5678 continue;
5679 }
Fam Zhenge4654d22013-11-13 18:29:43 +08005680 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5681 }
5682}
5683
5684int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5685{
5686 return hbitmap_count(bitmap->bitmap);
5687}
5688
Fam Zheng9fcb0252013-08-23 09:14:46 +08005689/* Get a reference to bs */
5690void bdrv_ref(BlockDriverState *bs)
5691{
5692 bs->refcnt++;
5693}
5694
5695/* Release a previously grabbed reference to bs.
5696 * If after releasing, reference count is zero, the BlockDriverState is
5697 * deleted. */
5698void bdrv_unref(BlockDriverState *bs)
5699{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005700 if (!bs) {
5701 return;
5702 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005703 assert(bs->refcnt > 0);
5704 if (--bs->refcnt == 0) {
5705 bdrv_delete(bs);
5706 }
5707}
5708
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005709struct BdrvOpBlocker {
5710 Error *reason;
5711 QLIST_ENTRY(BdrvOpBlocker) list;
5712};
5713
5714bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5715{
5716 BdrvOpBlocker *blocker;
5717 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5718 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5719 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5720 if (errp) {
Alberto Garcia81e5f782015-04-08 12:29:19 +03005721 error_setg(errp, "Node '%s' is busy: %s",
5722 bdrv_get_device_or_node_name(bs),
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005723 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005724 }
5725 return true;
5726 }
5727 return false;
5728}
5729
5730void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5731{
5732 BdrvOpBlocker *blocker;
5733 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5734
Markus Armbruster5839e532014-08-19 10:31:08 +02005735 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005736 blocker->reason = reason;
5737 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5738}
5739
5740void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5741{
5742 BdrvOpBlocker *blocker, *next;
5743 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5744 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5745 if (blocker->reason == reason) {
5746 QLIST_REMOVE(blocker, list);
5747 g_free(blocker);
5748 }
5749 }
5750}
5751
5752void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5753{
5754 int i;
5755 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5756 bdrv_op_block(bs, i, reason);
5757 }
5758}
5759
5760void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5761{
5762 int i;
5763 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5764 bdrv_op_unblock(bs, i, reason);
5765 }
5766}
5767
5768bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5769{
5770 int i;
5771
5772 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5773 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5774 return false;
5775 }
5776 }
5777 return true;
5778}
5779
Luiz Capitulino28a72822011-09-26 17:43:50 -03005780void bdrv_iostatus_enable(BlockDriverState *bs)
5781{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005782 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005783 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005784}
5785
5786/* The I/O status is only enabled if the drive explicitly
5787 * enables it _and_ the VM is configured to stop on errors */
5788bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5789{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005790 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005791 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5792 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5793 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005794}
5795
5796void bdrv_iostatus_disable(BlockDriverState *bs)
5797{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005798 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005799}
5800
5801void bdrv_iostatus_reset(BlockDriverState *bs)
5802{
5803 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005804 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005805 if (bs->job) {
5806 block_job_iostatus_reset(bs->job);
5807 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005808 }
5809}
5810
Luiz Capitulino28a72822011-09-26 17:43:50 -03005811void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5812{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005813 assert(bdrv_iostatus_is_enabled(bs));
5814 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005815 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5816 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005817 }
5818}
5819
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005820void bdrv_img_create(const char *filename, const char *fmt,
5821 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005822 char *options, uint64_t img_size, int flags,
5823 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005824{
Chunyan Liu83d05212014-06-05 17:20:51 +08005825 QemuOptsList *create_opts = NULL;
5826 QemuOpts *opts = NULL;
5827 const char *backing_fmt, *backing_file;
5828 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005829 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005830 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005831 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005832 int ret = 0;
5833
5834 /* Find driver and parse its options */
5835 drv = bdrv_find_format(fmt);
5836 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005837 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005838 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005839 }
5840
Max Reitzb65a5e12015-02-05 13:58:12 -05005841 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005842 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005843 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005844 }
5845
Max Reitzc6149722014-12-02 18:32:45 +01005846 if (!drv->create_opts) {
5847 error_setg(errp, "Format driver '%s' does not support image creation",
5848 drv->format_name);
5849 return;
5850 }
5851
5852 if (!proto_drv->create_opts) {
5853 error_setg(errp, "Protocol driver '%s' does not support image creation",
5854 proto_drv->format_name);
5855 return;
5856 }
5857
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005858 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5859 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005860
5861 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005862 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005863 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005864
5865 /* Parse -o options */
5866 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005867 qemu_opts_do_parse(opts, options, NULL, &local_err);
5868 if (local_err) {
5869 error_report_err(local_err);
5870 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005871 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005872 goto out;
5873 }
5874 }
5875
5876 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005877 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005878 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005879 error_setg(errp, "Backing file not supported for file format '%s'",
5880 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005881 goto out;
5882 }
5883 }
5884
5885 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005886 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005887 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005888 error_setg(errp, "Backing file format not supported for file "
5889 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005890 goto out;
5891 }
5892 }
5893
Chunyan Liu83d05212014-06-05 17:20:51 +08005894 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5895 if (backing_file) {
5896 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005897 error_setg(errp, "Error: Trying to create an image with the "
5898 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005899 goto out;
5900 }
5901 }
5902
Chunyan Liu83d05212014-06-05 17:20:51 +08005903 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5904 if (backing_fmt) {
5905 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005906 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005907 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005908 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005909 goto out;
5910 }
5911 }
5912
5913 // The size for the image must always be specified, with one exception:
5914 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005915 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5916 if (size == -1) {
5917 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005918 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01005919 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005920 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005921 int back_flags;
5922
Max Reitz29168012014-11-26 17:20:27 +01005923 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
5924 full_backing, PATH_MAX,
5925 &local_err);
5926 if (local_err) {
5927 g_free(full_backing);
5928 goto out;
5929 }
5930
Paolo Bonzini63090da2012-04-12 14:01:03 +02005931 /* backing files always opened read-only */
5932 back_flags =
5933 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005934
Max Reitzf67503e2014-02-18 18:33:05 +01005935 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01005936 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005937 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01005938 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005939 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005940 goto out;
5941 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005942 size = bdrv_getlength(bs);
5943 if (size < 0) {
5944 error_setg_errno(errp, -size, "Could not get size of '%s'",
5945 backing_file);
5946 bdrv_unref(bs);
5947 goto out;
5948 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005949
Markus Armbruster39101f22015-02-12 16:46:36 +01005950 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01005951
5952 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005953 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005954 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005955 goto out;
5956 }
5957 }
5958
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005959 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08005960 printf("Formatting '%s', fmt=%s", filename, fmt);
5961 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005962 puts("");
5963 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005964
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005965 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005966
Max Reitzcc84d902013-09-06 17:14:26 +02005967 if (ret == -EFBIG) {
5968 /* This is generally a better message than whatever the driver would
5969 * deliver (especially because of the cluster_size_hint), since that
5970 * is most probably not much different from "image too large". */
5971 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005972 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005973 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005974 }
Max Reitzcc84d902013-09-06 17:14:26 +02005975 error_setg(errp, "The image size is too large for file format '%s'"
5976 "%s", fmt, cluster_size_hint);
5977 error_free(local_err);
5978 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005979 }
5980
5981out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005982 qemu_opts_del(opts);
5983 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005984 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005985 error_propagate(errp, local_err);
5986 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005987}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005988
5989AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5990{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005991 return bs->aio_context;
5992}
5993
5994void bdrv_detach_aio_context(BlockDriverState *bs)
5995{
Max Reitz33384422014-06-20 21:57:33 +02005996 BdrvAioNotifier *baf;
5997
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005998 if (!bs->drv) {
5999 return;
6000 }
6001
Max Reitz33384422014-06-20 21:57:33 +02006002 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
6003 baf->detach_aio_context(baf->opaque);
6004 }
6005
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006006 if (bs->io_limits_enabled) {
6007 throttle_detach_aio_context(&bs->throttle_state);
6008 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006009 if (bs->drv->bdrv_detach_aio_context) {
6010 bs->drv->bdrv_detach_aio_context(bs);
6011 }
6012 if (bs->file) {
6013 bdrv_detach_aio_context(bs->file);
6014 }
6015 if (bs->backing_hd) {
6016 bdrv_detach_aio_context(bs->backing_hd);
6017 }
6018
6019 bs->aio_context = NULL;
6020}
6021
6022void bdrv_attach_aio_context(BlockDriverState *bs,
6023 AioContext *new_context)
6024{
Max Reitz33384422014-06-20 21:57:33 +02006025 BdrvAioNotifier *ban;
6026
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006027 if (!bs->drv) {
6028 return;
6029 }
6030
6031 bs->aio_context = new_context;
6032
6033 if (bs->backing_hd) {
6034 bdrv_attach_aio_context(bs->backing_hd, new_context);
6035 }
6036 if (bs->file) {
6037 bdrv_attach_aio_context(bs->file, new_context);
6038 }
6039 if (bs->drv->bdrv_attach_aio_context) {
6040 bs->drv->bdrv_attach_aio_context(bs, new_context);
6041 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02006042 if (bs->io_limits_enabled) {
6043 throttle_attach_aio_context(&bs->throttle_state, new_context);
6044 }
Max Reitz33384422014-06-20 21:57:33 +02006045
6046 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
6047 ban->attached_aio_context(new_context, ban->opaque);
6048 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02006049}
6050
6051void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
6052{
6053 bdrv_drain_all(); /* ensure there are no in-flight requests */
6054
6055 bdrv_detach_aio_context(bs);
6056
6057 /* This function executes in the old AioContext so acquire the new one in
6058 * case it runs in a different thread.
6059 */
6060 aio_context_acquire(new_context);
6061 bdrv_attach_aio_context(bs, new_context);
6062 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01006063}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006064
Max Reitz33384422014-06-20 21:57:33 +02006065void bdrv_add_aio_context_notifier(BlockDriverState *bs,
6066 void (*attached_aio_context)(AioContext *new_context, void *opaque),
6067 void (*detach_aio_context)(void *opaque), void *opaque)
6068{
6069 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
6070 *ban = (BdrvAioNotifier){
6071 .attached_aio_context = attached_aio_context,
6072 .detach_aio_context = detach_aio_context,
6073 .opaque = opaque
6074 };
6075
6076 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
6077}
6078
6079void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
6080 void (*attached_aio_context)(AioContext *,
6081 void *),
6082 void (*detach_aio_context)(void *),
6083 void *opaque)
6084{
6085 BdrvAioNotifier *ban, *ban_next;
6086
6087 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
6088 if (ban->attached_aio_context == attached_aio_context &&
6089 ban->detach_aio_context == detach_aio_context &&
6090 ban->opaque == opaque)
6091 {
6092 QLIST_REMOVE(ban, list);
6093 g_free(ban);
6094
6095 return;
6096 }
6097 }
6098
6099 abort();
6100}
6101
Stefan Hajnoczid616b222013-06-24 17:13:10 +02006102void bdrv_add_before_write_notifier(BlockDriverState *bs,
6103 NotifierWithReturn *notifier)
6104{
6105 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
6106}
Max Reitz6f176b42013-09-03 10:09:50 +02006107
Max Reitz77485432014-10-27 11:12:50 +01006108int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
6109 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02006110{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08006111 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02006112 return -ENOTSUP;
6113 }
Max Reitz77485432014-10-27 11:12:50 +01006114 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02006115}
Benoît Canetf6186f42013-10-02 14:33:48 +02006116
Benoît Canetb5042a32014-03-03 19:11:34 +01006117/* This function will be called by the bdrv_recurse_is_first_non_filter method
6118 * of block filter and by bdrv_is_first_non_filter.
6119 * It is used to test if the given bs is the candidate or recurse more in the
6120 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01006121 */
Benoît Canet212a5a82014-01-23 21:31:36 +01006122bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6123 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02006124{
Benoît Canetb5042a32014-03-03 19:11:34 +01006125 /* return false if basic checks fails */
6126 if (!bs || !bs->drv) {
6127 return false;
6128 }
6129
6130 /* the code reached a non block filter driver -> check if the bs is
6131 * the same as the candidate. It's the recursion termination condition.
6132 */
6133 if (!bs->drv->is_filter) {
6134 return bs == candidate;
6135 }
6136 /* Down this path the driver is a block filter driver */
6137
6138 /* If the block filter recursion method is defined use it to recurse down
6139 * the node graph.
6140 */
6141 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006142 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6143 }
6144
Benoît Canetb5042a32014-03-03 19:11:34 +01006145 /* the driver is a block filter but don't allow to recurse -> return false
6146 */
6147 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006148}
6149
6150/* This function checks if the candidate is the first non filter bs down it's
6151 * bs chain. Since we don't have pointers to parents it explore all bs chains
6152 * from the top. Some filters can choose not to pass down the recursion.
6153 */
6154bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6155{
6156 BlockDriverState *bs;
6157
6158 /* walk down the bs forest recursively */
6159 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6160 bool perm;
6161
Benoît Canetb5042a32014-03-03 19:11:34 +01006162 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006163 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006164
6165 /* candidate is the first non filter */
6166 if (perm) {
6167 return true;
6168 }
6169 }
6170
6171 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006172}
Benoît Canet09158f02014-06-27 18:25:25 +02006173
6174BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6175{
6176 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006177 AioContext *aio_context;
6178
Benoît Canet09158f02014-06-27 18:25:25 +02006179 if (!to_replace_bs) {
6180 error_setg(errp, "Node name '%s' not found", node_name);
6181 return NULL;
6182 }
6183
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006184 aio_context = bdrv_get_aio_context(to_replace_bs);
6185 aio_context_acquire(aio_context);
6186
Benoît Canet09158f02014-06-27 18:25:25 +02006187 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006188 to_replace_bs = NULL;
6189 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006190 }
6191
6192 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6193 * most non filter in order to prevent data corruption.
6194 * Another benefit is that this tests exclude backing files which are
6195 * blocked by the backing blockers.
6196 */
6197 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6198 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006199 to_replace_bs = NULL;
6200 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006201 }
6202
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006203out:
6204 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006205 return to_replace_bs;
6206}
Ming Lei448ad912014-07-04 18:04:33 +08006207
6208void bdrv_io_plug(BlockDriverState *bs)
6209{
6210 BlockDriver *drv = bs->drv;
6211 if (drv && drv->bdrv_io_plug) {
6212 drv->bdrv_io_plug(bs);
6213 } else if (bs->file) {
6214 bdrv_io_plug(bs->file);
6215 }
6216}
6217
6218void bdrv_io_unplug(BlockDriverState *bs)
6219{
6220 BlockDriver *drv = bs->drv;
6221 if (drv && drv->bdrv_io_unplug) {
6222 drv->bdrv_io_unplug(bs);
6223 } else if (bs->file) {
6224 bdrv_io_unplug(bs->file);
6225 }
6226}
6227
6228void bdrv_flush_io_queue(BlockDriverState *bs)
6229{
6230 BlockDriver *drv = bs->drv;
6231 if (drv && drv->bdrv_flush_io_queue) {
6232 drv->bdrv_flush_io_queue(bs);
6233 } else if (bs->file) {
6234 bdrv_flush_io_queue(bs->file);
6235 }
6236}
Max Reitz91af7012014-07-18 20:24:56 +02006237
6238static bool append_open_options(QDict *d, BlockDriverState *bs)
6239{
6240 const QDictEntry *entry;
6241 bool found_any = false;
6242
6243 for (entry = qdict_first(bs->options); entry;
6244 entry = qdict_next(bs->options, entry))
6245 {
6246 /* Only take options for this level and exclude all non-driver-specific
6247 * options */
6248 if (!strchr(qdict_entry_key(entry), '.') &&
6249 strcmp(qdict_entry_key(entry), "node-name"))
6250 {
6251 qobject_incref(qdict_entry_value(entry));
6252 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6253 found_any = true;
6254 }
6255 }
6256
6257 return found_any;
6258}
6259
6260/* Updates the following BDS fields:
6261 * - exact_filename: A filename which may be used for opening a block device
6262 * which (mostly) equals the given BDS (even without any
6263 * other options; so reading and writing must return the same
6264 * results, but caching etc. may be different)
6265 * - full_open_options: Options which, when given when opening a block device
6266 * (without a filename), result in a BDS (mostly)
6267 * equalling the given one
6268 * - filename: If exact_filename is set, it is copied here. Otherwise,
6269 * full_open_options is converted to a JSON object, prefixed with
6270 * "json:" (for use through the JSON pseudo protocol) and put here.
6271 */
6272void bdrv_refresh_filename(BlockDriverState *bs)
6273{
6274 BlockDriver *drv = bs->drv;
6275 QDict *opts;
6276
6277 if (!drv) {
6278 return;
6279 }
6280
6281 /* This BDS's file name will most probably depend on its file's name, so
6282 * refresh that first */
6283 if (bs->file) {
6284 bdrv_refresh_filename(bs->file);
6285 }
6286
6287 if (drv->bdrv_refresh_filename) {
6288 /* Obsolete information is of no use here, so drop the old file name
6289 * information before refreshing it */
6290 bs->exact_filename[0] = '\0';
6291 if (bs->full_open_options) {
6292 QDECREF(bs->full_open_options);
6293 bs->full_open_options = NULL;
6294 }
6295
6296 drv->bdrv_refresh_filename(bs);
6297 } else if (bs->file) {
6298 /* Try to reconstruct valid information from the underlying file */
6299 bool has_open_options;
6300
6301 bs->exact_filename[0] = '\0';
6302 if (bs->full_open_options) {
6303 QDECREF(bs->full_open_options);
6304 bs->full_open_options = NULL;
6305 }
6306
6307 opts = qdict_new();
6308 has_open_options = append_open_options(opts, bs);
6309
6310 /* If no specific options have been given for this BDS, the filename of
6311 * the underlying file should suffice for this one as well */
6312 if (bs->file->exact_filename[0] && !has_open_options) {
6313 strcpy(bs->exact_filename, bs->file->exact_filename);
6314 }
6315 /* Reconstructing the full options QDict is simple for most format block
6316 * drivers, as long as the full options are known for the underlying
6317 * file BDS. The full options QDict of that file BDS should somehow
6318 * contain a representation of the filename, therefore the following
6319 * suffices without querying the (exact_)filename of this BDS. */
6320 if (bs->file->full_open_options) {
6321 qdict_put_obj(opts, "driver",
6322 QOBJECT(qstring_from_str(drv->format_name)));
6323 QINCREF(bs->file->full_open_options);
6324 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6325
6326 bs->full_open_options = opts;
6327 } else {
6328 QDECREF(opts);
6329 }
6330 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6331 /* There is no underlying file BDS (at least referenced by BDS.file),
6332 * so the full options QDict should be equal to the options given
6333 * specifically for this block device when it was opened (plus the
6334 * driver specification).
6335 * Because those options don't change, there is no need to update
6336 * full_open_options when it's already set. */
6337
6338 opts = qdict_new();
6339 append_open_options(opts, bs);
6340 qdict_put_obj(opts, "driver",
6341 QOBJECT(qstring_from_str(drv->format_name)));
6342
6343 if (bs->exact_filename[0]) {
6344 /* This may not work for all block protocol drivers (some may
6345 * require this filename to be parsed), but we have to find some
6346 * default solution here, so just include it. If some block driver
6347 * does not support pure options without any filename at all or
6348 * needs some special format of the options QDict, it needs to
6349 * implement the driver-specific bdrv_refresh_filename() function.
6350 */
6351 qdict_put_obj(opts, "filename",
6352 QOBJECT(qstring_from_str(bs->exact_filename)));
6353 }
6354
6355 bs->full_open_options = opts;
6356 }
6357
6358 if (bs->exact_filename[0]) {
6359 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6360 } else if (bs->full_open_options) {
6361 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6362 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6363 qstring_get_str(json));
6364 QDECREF(json);
6365 }
6366}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006367
6368/* This accessor function purpose is to allow the device models to access the
6369 * BlockAcctStats structure embedded inside a BlockDriverState without being
6370 * aware of the BlockDriverState structure layout.
6371 * It will go away when the BlockAcctStats structure will be moved inside
6372 * the device models.
6373 */
6374BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6375{
6376 return &bs->stats;
6377}