blob: 670d7e4fc261b63a90f44075cbfe9f6f82caa647 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
Fam Zhenge4654d22013-11-13 18:29:43 +080055struct BdrvDirtyBitmap {
56 HBitmap *bitmap;
57 QLIST_ENTRY(BdrvDirtyBitmap) list;
58};
59
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010060#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
61
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020062static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000063 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020064 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020065static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000066 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020067 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020068static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
70 QEMUIOVector *iov);
71static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
73 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010074static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
75 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000076 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010077static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
78 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000079 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020080static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
81 int64_t sector_num,
82 QEMUIOVector *qiov,
83 int nb_sectors,
84 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020085 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020086 void *opaque,
87 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010088static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010089static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020090 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000091
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010092static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
93 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000094
Benoît Canetdc364f42014-01-23 21:31:32 +010095static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
96 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
97
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010098static QLIST_HEAD(, BlockDriver) bdrv_drivers =
99 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000100
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300101static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
102 int nr_sectors);
103static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
104 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100105/* If non-zero, use only whitelisted block drivers */
106static int use_bdrv_whitelist;
107
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000108#ifdef _WIN32
109static int is_windows_drive_prefix(const char *filename)
110{
111 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
112 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
113 filename[1] == ':');
114}
115
116int is_windows_drive(const char *filename)
117{
118 if (is_windows_drive_prefix(filename) &&
119 filename[2] == '\0')
120 return 1;
121 if (strstart(filename, "\\\\.\\", NULL) ||
122 strstart(filename, "//./", NULL))
123 return 1;
124 return 0;
125}
126#endif
127
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800128/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200129void bdrv_set_io_limits(BlockDriverState *bs,
130 ThrottleConfig *cfg)
131{
132 int i;
133
134 throttle_config(&bs->throttle_state, cfg);
135
136 for (i = 0; i < 2; i++) {
137 qemu_co_enter_next(&bs->throttled_reqs[i]);
138 }
139}
140
141/* this function drain all the throttled IOs */
142static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
143{
144 bool drained = false;
145 bool enabled = bs->io_limits_enabled;
146 int i;
147
148 bs->io_limits_enabled = false;
149
150 for (i = 0; i < 2; i++) {
151 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
152 drained = true;
153 }
154 }
155
156 bs->io_limits_enabled = enabled;
157
158 return drained;
159}
160
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161void bdrv_io_limits_disable(BlockDriverState *bs)
162{
163 bs->io_limits_enabled = false;
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800166
Benoît Canetcc0681c2013-09-02 14:14:39 +0200167 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800168}
169
Benoît Canetcc0681c2013-09-02 14:14:39 +0200170static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800171{
172 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200173 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800174}
175
Benoît Canetcc0681c2013-09-02 14:14:39 +0200176static void bdrv_throttle_write_timer_cb(void *opaque)
177{
178 BlockDriverState *bs = opaque;
179 qemu_co_enter_next(&bs->throttled_reqs[1]);
180}
181
182/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800183void bdrv_io_limits_enable(BlockDriverState *bs)
184{
Fam Zhengde50a202015-03-25 15:27:26 +0800185 int clock_type = QEMU_CLOCK_REALTIME;
186
187 if (qtest_enabled()) {
188 /* For testing block IO throttling only */
189 clock_type = QEMU_CLOCK_VIRTUAL;
190 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200191 assert(!bs->io_limits_enabled);
192 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200193 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800194 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200195 bdrv_throttle_read_timer_cb,
196 bdrv_throttle_write_timer_cb,
197 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800198 bs->io_limits_enabled = true;
199}
200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201/* This function makes an IO wait if needed
202 *
203 * @nb_sectors: the number of sectors of the IO
204 * @is_write: is the IO a write
205 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800206static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100207 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200208 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800209{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200210 /* does this io must wait */
211 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800212
Benoît Canetcc0681c2013-09-02 14:14:39 +0200213 /* if must wait or any request of this type throttled queue the IO */
214 if (must_wait ||
215 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
216 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800217 }
218
Benoît Canetcc0681c2013-09-02 14:14:39 +0200219 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100220 throttle_account(&bs->throttle_state, is_write, bytes);
221
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800222
Benoît Canetcc0681c2013-09-02 14:14:39 +0200223 /* if the next request must wait -> do nothing */
224 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
225 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800226 }
227
Benoît Canetcc0681c2013-09-02 14:14:39 +0200228 /* else queue next request for execution */
229 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800230}
231
Kevin Wolf339064d2013-11-28 10:23:32 +0100232size_t bdrv_opt_mem_align(BlockDriverState *bs)
233{
234 if (!bs || !bs->drv) {
235 /* 4k should be on the safe side */
236 return 4096;
237 }
238
239 return bs->bl.opt_mem_alignment;
240}
241
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000242/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100243int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 const char *p;
246
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247#ifdef _WIN32
248 if (is_windows_drive(path) ||
249 is_windows_drive_prefix(path)) {
250 return 0;
251 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200252 p = path + strcspn(path, ":/\\");
253#else
254 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000255#endif
256
Paolo Bonzini947995c2012-05-08 16:51:48 +0200257 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000258}
259
bellard83f64092006-08-01 16:21:11 +0000260int path_is_absolute(const char *path)
261{
bellard21664422007-01-07 18:22:37 +0000262#ifdef _WIN32
263 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200264 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000265 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200266 }
267 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000268#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200269 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000270#endif
bellard83f64092006-08-01 16:21:11 +0000271}
272
273/* if filename is absolute, just copy it to dest. Otherwise, build a
274 path to it by considering it is relative to base_path. URL are
275 supported. */
276void path_combine(char *dest, int dest_size,
277 const char *base_path,
278 const char *filename)
279{
280 const char *p, *p1;
281 int len;
282
283 if (dest_size <= 0)
284 return;
285 if (path_is_absolute(filename)) {
286 pstrcpy(dest, dest_size, filename);
287 } else {
288 p = strchr(base_path, ':');
289 if (p)
290 p++;
291 else
292 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000293 p1 = strrchr(base_path, '/');
294#ifdef _WIN32
295 {
296 const char *p2;
297 p2 = strrchr(base_path, '\\');
298 if (!p1 || p2 > p1)
299 p1 = p2;
300 }
301#endif
bellard83f64092006-08-01 16:21:11 +0000302 if (p1)
303 p1++;
304 else
305 p1 = base_path;
306 if (p1 > p)
307 p = p1;
308 len = p - base_path;
309 if (len > dest_size - 1)
310 len = dest_size - 1;
311 memcpy(dest, base_path, len);
312 dest[len] = '\0';
313 pstrcat(dest, dest_size, filename);
314 }
315}
316
Max Reitz0a828552014-11-26 17:20:25 +0100317void bdrv_get_full_backing_filename_from_filename(const char *backed,
318 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100319 char *dest, size_t sz,
320 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100321{
Max Reitz9f074292014-11-26 17:20:26 +0100322 if (backing[0] == '\0' || path_has_protocol(backing) ||
323 path_is_absolute(backing))
324 {
Max Reitz0a828552014-11-26 17:20:25 +0100325 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100326 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
327 error_setg(errp, "Cannot use relative backing file names for '%s'",
328 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100329 } else {
330 path_combine(dest, sz, backed, backing);
331 }
332}
333
Max Reitz9f074292014-11-26 17:20:26 +0100334void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
335 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200336{
Max Reitz9f074292014-11-26 17:20:26 +0100337 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
338
339 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
340 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200341}
342
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500343void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000344{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100345 /* Block drivers without coroutine functions need emulation */
346 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200347 bdrv->bdrv_co_readv = bdrv_co_readv_em;
348 bdrv->bdrv_co_writev = bdrv_co_writev_em;
349
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100350 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
351 * the block driver lacks aio we need to emulate that too.
352 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200353 if (!bdrv->bdrv_aio_readv) {
354 /* add AIO emulation layer */
355 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
356 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200357 }
bellard83f64092006-08-01 16:21:11 +0000358 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200359
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100360 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000361}
bellardb3380822004-03-14 21:38:54 +0000362
Markus Armbruster7f06d472014-10-07 13:59:12 +0200363BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000364{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200365 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200366
Markus Armbrustere4e99862014-10-07 13:59:03 +0200367 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200368 return bs;
369}
370
371BlockDriverState *bdrv_new(void)
372{
373 BlockDriverState *bs;
374 int i;
375
Markus Armbruster5839e532014-08-19 10:31:08 +0200376 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800377 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800378 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
379 QLIST_INIT(&bs->op_blockers[i]);
380 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300381 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200382 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200383 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200384 qemu_co_queue_init(&bs->throttled_reqs[0]);
385 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800386 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200387 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200388
bellardb3380822004-03-14 21:38:54 +0000389 return bs;
390}
391
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200392void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
393{
394 notifier_list_add(&bs->close_notifiers, notify);
395}
396
bellardea2384d2004-08-01 21:59:26 +0000397BlockDriver *bdrv_find_format(const char *format_name)
398{
399 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000402 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100403 }
bellardea2384d2004-08-01 21:59:26 +0000404 }
405 return NULL;
406}
407
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800408static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100409{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800410 static const char *whitelist_rw[] = {
411 CONFIG_BDRV_RW_WHITELIST
412 };
413 static const char *whitelist_ro[] = {
414 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100415 };
416 const char **p;
417
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800418 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100419 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800420 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100421
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800422 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100423 if (!strcmp(drv->format_name, *p)) {
424 return 1;
425 }
426 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800427 if (read_only) {
428 for (p = whitelist_ro; *p; p++) {
429 if (!strcmp(drv->format_name, *p)) {
430 return 1;
431 }
432 }
433 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100434 return 0;
435}
436
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800437BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
438 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100439{
440 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800441 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100442}
443
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800444typedef struct CreateCo {
445 BlockDriver *drv;
446 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800447 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200449 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800450} CreateCo;
451
452static void coroutine_fn bdrv_create_co_entry(void *opaque)
453{
Max Reitzcc84d902013-09-06 17:14:26 +0200454 Error *local_err = NULL;
455 int ret;
456
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800457 CreateCo *cco = opaque;
458 assert(cco->drv);
459
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800460 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100461 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200462 error_propagate(&cco->err, local_err);
463 }
464 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800465}
466
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200467int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800468 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000469{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800470 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200471
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 Coroutine *co;
473 CreateCo cco = {
474 .drv = drv,
475 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800476 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800477 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200478 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800479 };
480
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800481 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200482 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300483 ret = -ENOTSUP;
484 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800485 }
486
487 if (qemu_in_coroutine()) {
488 /* Fast-path if already in coroutine context */
489 bdrv_create_co_entry(&cco);
490 } else {
491 co = qemu_coroutine_create(bdrv_create_co_entry);
492 qemu_coroutine_enter(co, &cco);
493 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200494 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800495 }
496 }
497
498 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200499 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100500 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200501 error_propagate(errp, cco.err);
502 } else {
503 error_setg_errno(errp, -ret, "Could not create image");
504 }
505 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800506
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300507out:
508 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800509 return ret;
bellardea2384d2004-08-01 21:59:26 +0000510}
511
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800512int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200513{
514 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200515 Error *local_err = NULL;
516 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200517
Max Reitzb65a5e12015-02-05 13:58:12 -0500518 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200519 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000520 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200521 }
522
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800523 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100524 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200525 error_propagate(errp, local_err);
526 }
527 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200528}
529
Kevin Wolf3baca892014-07-16 17:48:16 +0200530void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100531{
532 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200533 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100534
535 memset(&bs->bl, 0, sizeof(bs->bl));
536
Kevin Wolf466ad822013-12-11 19:50:32 +0100537 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200538 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100539 }
540
541 /* Take some limits from the children as a default */
542 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200543 bdrv_refresh_limits(bs->file, &local_err);
544 if (local_err) {
545 error_propagate(errp, local_err);
546 return;
547 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100548 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100549 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100550 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
551 } else {
552 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100553 }
554
555 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200556 bdrv_refresh_limits(bs->backing_hd, &local_err);
557 if (local_err) {
558 error_propagate(errp, local_err);
559 return;
560 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100561 bs->bl.opt_transfer_length =
562 MAX(bs->bl.opt_transfer_length,
563 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100564 bs->bl.max_transfer_length =
565 MIN_NON_ZERO(bs->bl.max_transfer_length,
566 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100567 bs->bl.opt_mem_alignment =
568 MAX(bs->bl.opt_mem_alignment,
569 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100570 }
571
572 /* Then let the driver override it */
573 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200574 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100575 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100576}
577
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100578/**
579 * Try to get @bs's logical and physical block size.
580 * On success, store them in @bsz struct and return 0.
581 * On failure return -errno.
582 * @bs must not be empty.
583 */
584int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
585{
586 BlockDriver *drv = bs->drv;
587
588 if (drv && drv->bdrv_probe_blocksizes) {
589 return drv->bdrv_probe_blocksizes(bs, bsz);
590 }
591
592 return -ENOTSUP;
593}
594
595/**
596 * Try to get @bs's geometry (cyls, heads, sectors).
597 * On success, store them in @geo struct and return 0.
598 * On failure return -errno.
599 * @bs must not be empty.
600 */
601int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
602{
603 BlockDriver *drv = bs->drv;
604
605 if (drv && drv->bdrv_probe_geometry) {
606 return drv->bdrv_probe_geometry(bs, geo);
607 }
608
609 return -ENOTSUP;
610}
611
Jim Meyeringeba25052012-05-28 09:27:54 +0200612/*
613 * Create a uniquely-named empty temporary file.
614 * Return 0 upon success, otherwise a negative errno value.
615 */
616int get_tmp_filename(char *filename, int size)
617{
bellardd5249392004-08-03 21:14:23 +0000618#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000619 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200620 /* GetTempFileName requires that its output buffer (4th param)
621 have length MAX_PATH or greater. */
622 assert(size >= MAX_PATH);
623 return (GetTempPath(MAX_PATH, temp_dir)
624 && GetTempFileName(temp_dir, "qem", 0, filename)
625 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000626#else
bellardea2384d2004-08-01 21:59:26 +0000627 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000628 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000629 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530630 if (!tmpdir) {
631 tmpdir = "/var/tmp";
632 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200633 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
634 return -EOVERFLOW;
635 }
bellardea2384d2004-08-01 21:59:26 +0000636 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800637 if (fd < 0) {
638 return -errno;
639 }
640 if (close(fd) != 0) {
641 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200642 return -errno;
643 }
644 return 0;
bellardd5249392004-08-03 21:14:23 +0000645#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200646}
bellardea2384d2004-08-01 21:59:26 +0000647
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200648/*
649 * Detect host devices. By convention, /dev/cdrom[N] is always
650 * recognized as a host CDROM.
651 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200652static BlockDriver *find_hdev_driver(const char *filename)
653{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200654 int score_max = 0, score;
655 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200656
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100657 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200658 if (d->bdrv_probe_device) {
659 score = d->bdrv_probe_device(filename);
660 if (score > score_max) {
661 score_max = score;
662 drv = d;
663 }
664 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200665 }
666
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200667 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200668}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200669
Kevin Wolf98289622013-07-10 15:47:39 +0200670BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500671 bool allow_protocol_prefix,
672 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200673{
674 BlockDriver *drv1;
675 char protocol[128];
676 int len;
677 const char *p;
678
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200679 /* TODO Drivers without bdrv_file_open must be specified explicitly */
680
Christoph Hellwig39508e72010-06-23 12:25:17 +0200681 /*
682 * XXX(hch): we really should not let host device detection
683 * override an explicit protocol specification, but moving this
684 * later breaks access to device names with colons in them.
685 * Thanks to the brain-dead persistent naming schemes on udev-
686 * based Linux systems those actually are quite common.
687 */
688 drv1 = find_hdev_driver(filename);
689 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200690 return drv1;
691 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200692
Kevin Wolf98289622013-07-10 15:47:39 +0200693 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100694 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200695 }
Kevin Wolf98289622013-07-10 15:47:39 +0200696
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000697 p = strchr(filename, ':');
698 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200699 len = p - filename;
700 if (len > sizeof(protocol) - 1)
701 len = sizeof(protocol) - 1;
702 memcpy(protocol, filename, len);
703 protocol[len] = '\0';
704 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
705 if (drv1->protocol_name &&
706 !strcmp(drv1->protocol_name, protocol)) {
707 return drv1;
708 }
709 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500710
711 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200712 return NULL;
713}
714
Markus Armbrusterc6684242014-11-20 16:27:10 +0100715/*
716 * Guess image format by probing its contents.
717 * This is not a good idea when your image is raw (CVE-2008-2004), but
718 * we do it anyway for backward compatibility.
719 *
720 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100721 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
722 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100723 * @filename is its filename.
724 *
725 * For all block drivers, call the bdrv_probe() method to get its
726 * probing score.
727 * Return the first block driver with the highest probing score.
728 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100729BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
730 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100731{
732 int score_max = 0, score;
733 BlockDriver *drv = NULL, *d;
734
735 QLIST_FOREACH(d, &bdrv_drivers, list) {
736 if (d->bdrv_probe) {
737 score = d->bdrv_probe(buf, buf_size, filename);
738 if (score > score_max) {
739 score_max = score;
740 drv = d;
741 }
742 }
743 }
744
745 return drv;
746}
747
Kevin Wolff500a6d2012-11-12 17:35:27 +0100748static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200749 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000750{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100751 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100752 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100753 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700754
Kevin Wolf08a00552010-06-01 18:37:31 +0200755 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100756 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100757 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200758 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700759 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700760
bellard83f64092006-08-01 16:21:11 +0000761 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000762 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200763 error_setg_errno(errp, -ret, "Could not read image for determining its "
764 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200765 *pdrv = NULL;
766 return ret;
bellard83f64092006-08-01 16:21:11 +0000767 }
768
Markus Armbrusterc6684242014-11-20 16:27:10 +0100769 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200770 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200771 error_setg(errp, "Could not determine image format: No compatible "
772 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200773 ret = -ENOENT;
774 }
775 *pdrv = drv;
776 return ret;
bellardea2384d2004-08-01 21:59:26 +0000777}
778
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100779/**
780 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200781 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100782 */
783static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
784{
785 BlockDriver *drv = bs->drv;
786
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700787 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
788 if (bs->sg)
789 return 0;
790
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100791 /* query actual device if possible, otherwise just trust the hint */
792 if (drv->bdrv_getlength) {
793 int64_t length = drv->bdrv_getlength(bs);
794 if (length < 0) {
795 return length;
796 }
Fam Zheng7e382002013-11-06 19:48:06 +0800797 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100798 }
799
800 bs->total_sectors = hint;
801 return 0;
802}
803
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100804/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100805 * Set open flags for a given discard mode
806 *
807 * Return 0 on success, -1 if the discard mode was invalid.
808 */
809int bdrv_parse_discard_flags(const char *mode, int *flags)
810{
811 *flags &= ~BDRV_O_UNMAP;
812
813 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
814 /* do nothing */
815 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
816 *flags |= BDRV_O_UNMAP;
817 } else {
818 return -1;
819 }
820
821 return 0;
822}
823
824/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100825 * Set open flags for a given cache mode
826 *
827 * Return 0 on success, -1 if the cache mode was invalid.
828 */
829int bdrv_parse_cache_flags(const char *mode, int *flags)
830{
831 *flags &= ~BDRV_O_CACHE_MASK;
832
833 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
834 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100835 } else if (!strcmp(mode, "directsync")) {
836 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100837 } else if (!strcmp(mode, "writeback")) {
838 *flags |= BDRV_O_CACHE_WB;
839 } else if (!strcmp(mode, "unsafe")) {
840 *flags |= BDRV_O_CACHE_WB;
841 *flags |= BDRV_O_NO_FLUSH;
842 } else if (!strcmp(mode, "writethrough")) {
843 /* this is the default */
844 } else {
845 return -1;
846 }
847
848 return 0;
849}
850
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000851/**
852 * The copy-on-read flag is actually a reference count so multiple users may
853 * use the feature without worrying about clobbering its previous state.
854 * Copy-on-read stays enabled until all users have called to disable it.
855 */
856void bdrv_enable_copy_on_read(BlockDriverState *bs)
857{
858 bs->copy_on_read++;
859}
860
861void bdrv_disable_copy_on_read(BlockDriverState *bs)
862{
863 assert(bs->copy_on_read > 0);
864 bs->copy_on_read--;
865}
866
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200867/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200868 * Returns the flags that a temporary snapshot should get, based on the
869 * originally requested flags (the originally requested image will have flags
870 * like a backing file)
871 */
872static int bdrv_temp_snapshot_flags(int flags)
873{
874 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
875}
876
877/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200878 * Returns the flags that bs->file should get, based on the given flags for
879 * the parent BDS
880 */
881static int bdrv_inherited_flags(int flags)
882{
883 /* Enable protocol handling, disable format probing for bs->file */
884 flags |= BDRV_O_PROTOCOL;
885
886 /* Our block drivers take care to send flushes and respect unmap policy,
887 * so we can enable both unconditionally on lower layers. */
888 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
889
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200890 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200891 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200892
893 return flags;
894}
895
Kevin Wolf317fc442014-04-25 13:27:34 +0200896/*
897 * Returns the flags that bs->backing_hd should get, based on the given flags
898 * for the parent BDS
899 */
900static int bdrv_backing_flags(int flags)
901{
902 /* backing files always opened read-only */
903 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
904
905 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200906 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200907
908 return flags;
909}
910
Kevin Wolf7b272452012-11-12 17:05:39 +0100911static int bdrv_open_flags(BlockDriverState *bs, int flags)
912{
913 int open_flags = flags | BDRV_O_CACHE_WB;
914
915 /*
916 * Clear flags that are internal to the block layer before opening the
917 * image.
918 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200919 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100920
921 /*
922 * Snapshots should be writable.
923 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200924 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100925 open_flags |= BDRV_O_RDWR;
926 }
927
928 return open_flags;
929}
930
Kevin Wolf636ea372014-01-24 14:11:52 +0100931static void bdrv_assign_node_name(BlockDriverState *bs,
932 const char *node_name,
933 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100934{
935 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100936 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100937 }
938
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200939 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200940 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200941 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100942 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100943 }
944
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100945 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200946 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100947 error_setg(errp, "node-name=%s is conflicting with a device id",
948 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100949 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100950 }
951
Benoît Canet6913c0c2014-01-23 21:31:33 +0100952 /* takes care of avoiding duplicates node names */
953 if (bdrv_find_node(node_name)) {
954 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100955 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100956 }
957
958 /* copy node name into the bs and insert it into the graph list */
959 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
960 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100961}
962
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200963/*
Kevin Wolf57915332010-04-14 15:24:50 +0200964 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100965 *
966 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200967 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100968static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200969 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200970{
971 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200972 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100973 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200974 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200975
976 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200977 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100978 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200979
Kevin Wolf45673672013-04-22 17:48:40 +0200980 if (file != NULL) {
981 filename = file->filename;
982 } else {
983 filename = qdict_get_try_str(options, "filename");
984 }
985
Kevin Wolf765003d2014-02-03 14:49:42 +0100986 if (drv->bdrv_needs_filename && !filename) {
987 error_setg(errp, "The '%s' block driver requires a file name",
988 drv->format_name);
989 return -EINVAL;
990 }
991
Kevin Wolf45673672013-04-22 17:48:40 +0200992 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100993
Benoît Canet6913c0c2014-01-23 21:31:33 +0100994 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100995 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200996 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100997 error_propagate(errp, local_err);
998 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100999 }
1000 qdict_del(options, "node-name");
1001
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001002 /* bdrv_open() with directly using a protocol as drv. This layer is already
1003 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1004 * and return immediately. */
1005 if (file != NULL && drv->bdrv_file_open) {
1006 bdrv_swap(file, bs);
1007 return 0;
1008 }
1009
Kevin Wolf57915332010-04-14 15:24:50 +02001010 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001011 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001012 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001013 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001014 open_flags = bdrv_open_flags(bs, flags);
1015 bs->read_only = !(open_flags & BDRV_O_RDWR);
1016
1017 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001018 error_setg(errp,
1019 !bs->read_only && bdrv_is_whitelisted(drv, true)
1020 ? "Driver '%s' can only be used for read-only devices"
1021 : "Driver '%s' is not whitelisted",
1022 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001023 return -ENOTSUP;
1024 }
Kevin Wolf57915332010-04-14 15:24:50 +02001025
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001026 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001027 if (flags & BDRV_O_COPY_ON_READ) {
1028 if (!bs->read_only) {
1029 bdrv_enable_copy_on_read(bs);
1030 } else {
1031 error_setg(errp, "Can't use copy-on-read on read-only device");
1032 return -EINVAL;
1033 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001034 }
1035
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001036 if (filename != NULL) {
1037 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1038 } else {
1039 bs->filename[0] = '\0';
1040 }
Max Reitz91af7012014-07-18 20:24:56 +02001041 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001042
Kevin Wolf57915332010-04-14 15:24:50 +02001043 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001044 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001045
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001046 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001047
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001048 /* Open the image, either directly or using a protocol */
1049 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001050 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001051 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001052 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001053 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001054 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001055 error_setg(errp, "Can't use '%s' as a block driver for the "
1056 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001057 ret = -EINVAL;
1058 goto free_and_fail;
1059 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001060 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001061 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001062 }
1063
Kevin Wolf57915332010-04-14 15:24:50 +02001064 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001065 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001066 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001067 } else if (bs->filename[0]) {
1068 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001069 } else {
1070 error_setg_errno(errp, -ret, "Could not open image");
1071 }
Kevin Wolf57915332010-04-14 15:24:50 +02001072 goto free_and_fail;
1073 }
1074
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001075 if (bs->encrypted) {
1076 error_report("Encrypted images are deprecated");
1077 error_printf("Support for them will be removed in a future release.\n"
1078 "You can use 'qemu-img convert' to convert your image"
1079 " to an unencrypted one.\n");
1080 }
1081
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001082 ret = refresh_total_sectors(bs, bs->total_sectors);
1083 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001084 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001085 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001086 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001087
Kevin Wolf3baca892014-07-16 17:48:16 +02001088 bdrv_refresh_limits(bs, &local_err);
1089 if (local_err) {
1090 error_propagate(errp, local_err);
1091 ret = -EINVAL;
1092 goto free_and_fail;
1093 }
1094
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001095 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001096 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001097 return 0;
1098
1099free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001100 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001101 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001102 bs->opaque = NULL;
1103 bs->drv = NULL;
1104 return ret;
1105}
1106
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001107static QDict *parse_json_filename(const char *filename, Error **errp)
1108{
1109 QObject *options_obj;
1110 QDict *options;
1111 int ret;
1112
1113 ret = strstart(filename, "json:", &filename);
1114 assert(ret);
1115
1116 options_obj = qobject_from_json(filename);
1117 if (!options_obj) {
1118 error_setg(errp, "Could not parse the JSON options");
1119 return NULL;
1120 }
1121
1122 if (qobject_type(options_obj) != QTYPE_QDICT) {
1123 qobject_decref(options_obj);
1124 error_setg(errp, "Invalid JSON object given");
1125 return NULL;
1126 }
1127
1128 options = qobject_to_qdict(options_obj);
1129 qdict_flatten(options);
1130
1131 return options;
1132}
1133
Kevin Wolf57915332010-04-14 15:24:50 +02001134/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001135 * Fills in default options for opening images and converts the legacy
1136 * filename/flags pair to option QDict entries.
1137 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001138static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001139 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001140{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001141 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001142 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001143 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001144 bool parse_filename = false;
1145 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001146
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001147 /* Parse json: pseudo-protocol */
1148 if (filename && g_str_has_prefix(filename, "json:")) {
1149 QDict *json_options = parse_json_filename(filename, &local_err);
1150 if (local_err) {
1151 error_propagate(errp, local_err);
1152 return -EINVAL;
1153 }
1154
1155 /* Options given in the filename have lower priority than options
1156 * specified directly */
1157 qdict_join(*options, json_options, false);
1158 QDECREF(json_options);
1159 *pfilename = filename = NULL;
1160 }
1161
Kevin Wolff54120f2014-05-26 11:09:59 +02001162 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001163 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001164 if (!qdict_haskey(*options, "filename")) {
1165 qdict_put(*options, "filename", qstring_from_str(filename));
1166 parse_filename = true;
1167 } else {
1168 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1169 "the same time");
1170 return -EINVAL;
1171 }
1172 }
1173
1174 /* Find the right block driver */
1175 filename = qdict_get_try_str(*options, "filename");
1176 drvname = qdict_get_try_str(*options, "driver");
1177
Kevin Wolf17b005f2014-05-27 10:50:29 +02001178 if (drv) {
1179 if (drvname) {
1180 error_setg(errp, "Driver specified twice");
1181 return -EINVAL;
1182 }
1183 drvname = drv->format_name;
1184 qdict_put(*options, "driver", qstring_from_str(drvname));
1185 } else {
1186 if (!drvname && protocol) {
1187 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001188 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001189 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001190 return -EINVAL;
1191 }
1192
1193 drvname = drv->format_name;
1194 qdict_put(*options, "driver", qstring_from_str(drvname));
1195 } else {
1196 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001197 return -EINVAL;
1198 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001199 } else if (drvname) {
1200 drv = bdrv_find_format(drvname);
1201 if (!drv) {
1202 error_setg(errp, "Unknown driver '%s'", drvname);
1203 return -ENOENT;
1204 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001205 }
1206 }
1207
Kevin Wolf17b005f2014-05-27 10:50:29 +02001208 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001209
1210 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001211 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001212 drv->bdrv_parse_filename(filename, *options, &local_err);
1213 if (local_err) {
1214 error_propagate(errp, local_err);
1215 return -EINVAL;
1216 }
1217
1218 if (!drv->bdrv_needs_filename) {
1219 qdict_del(*options, "filename");
1220 }
1221 }
1222
1223 return 0;
1224}
1225
Fam Zheng8d24cce2014-05-23 21:29:45 +08001226void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1227{
1228
Fam Zheng826b6ca2014-05-23 21:29:47 +08001229 if (bs->backing_hd) {
1230 assert(bs->backing_blocker);
1231 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1232 } else if (backing_hd) {
1233 error_setg(&bs->backing_blocker,
1234 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001235 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001236 }
1237
Fam Zheng8d24cce2014-05-23 21:29:45 +08001238 bs->backing_hd = backing_hd;
1239 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001240 error_free(bs->backing_blocker);
1241 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001242 goto out;
1243 }
1244 bs->open_flags &= ~BDRV_O_NO_BACKING;
1245 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1246 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1247 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001248
1249 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1250 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001251 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001252 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001253out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001254 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001255}
1256
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001257/*
1258 * Opens the backing file for a BlockDriverState if not yet open
1259 *
1260 * options is a QDict of options to pass to the block drivers, or NULL for an
1261 * empty set of options. The reference to the QDict is transferred to this
1262 * function (even on failure), so if the caller intends to reuse the dictionary,
1263 * it needs to use QINCREF() before calling bdrv_file_open.
1264 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001265int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001266{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001267 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001268 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001269 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001270 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001271
1272 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001273 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001274 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001275 }
1276
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001277 /* NULL means an empty set of options */
1278 if (options == NULL) {
1279 options = qdict_new();
1280 }
1281
Paolo Bonzini9156df12012-10-18 16:49:17 +02001282 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001283 if (qdict_haskey(options, "file.filename")) {
1284 backing_filename[0] = '\0';
1285 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001286 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001287 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001288 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001289 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1290 &local_err);
1291 if (local_err) {
1292 ret = -EINVAL;
1293 error_propagate(errp, local_err);
1294 QDECREF(options);
1295 goto free_exit;
1296 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001297 }
1298
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001299 if (!bs->drv || !bs->drv->supports_backing) {
1300 ret = -EINVAL;
1301 error_setg(errp, "Driver doesn't support backing files");
1302 QDECREF(options);
1303 goto free_exit;
1304 }
1305
Markus Armbrustere4e99862014-10-07 13:59:03 +02001306 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001307
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001308 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1309 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001310 }
1311
Max Reitzf67503e2014-02-18 18:33:05 +01001312 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001313 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001314 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001315 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001316 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001317 bdrv_unref(backing_hd);
1318 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001319 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001320 error_setg(errp, "Could not open backing file: %s",
1321 error_get_pretty(local_err));
1322 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001323 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001324 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001325 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001326
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001327free_exit:
1328 g_free(backing_filename);
1329 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001330}
1331
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001332/*
Max Reitzda557aa2013-12-20 19:28:11 +01001333 * Opens a disk image whose options are given as BlockdevRef in another block
1334 * device's options.
1335 *
Max Reitzda557aa2013-12-20 19:28:11 +01001336 * If allow_none is true, no image will be opened if filename is false and no
1337 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1338 *
1339 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1340 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1341 * itself, all options starting with "${bdref_key}." are considered part of the
1342 * BlockdevRef.
1343 *
1344 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001345 *
1346 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001347 */
1348int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1349 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001350 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001351{
1352 QDict *image_options;
1353 int ret;
1354 char *bdref_key_dot;
1355 const char *reference;
1356
Max Reitzf67503e2014-02-18 18:33:05 +01001357 assert(pbs);
1358 assert(*pbs == NULL);
1359
Max Reitzda557aa2013-12-20 19:28:11 +01001360 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1361 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1362 g_free(bdref_key_dot);
1363
1364 reference = qdict_get_try_str(options, bdref_key);
1365 if (!filename && !reference && !qdict_size(image_options)) {
1366 if (allow_none) {
1367 ret = 0;
1368 } else {
1369 error_setg(errp, "A block device must be specified for \"%s\"",
1370 bdref_key);
1371 ret = -EINVAL;
1372 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001373 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001374 goto done;
1375 }
1376
Max Reitzf7d9fd82014-02-18 18:33:12 +01001377 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001378
1379done:
1380 qdict_del(options, bdref_key);
1381 return ret;
1382}
1383
Chen Gang6b8aeca2014-06-23 23:28:23 +08001384int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001385{
1386 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001387 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001388 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001389 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001390 QDict *snapshot_options;
1391 BlockDriverState *bs_snapshot;
1392 Error *local_err;
1393 int ret;
1394
1395 /* if snapshot, we create a temporary backing file and open it
1396 instead of opening 'filename' directly */
1397
1398 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001399 total_size = bdrv_getlength(bs);
1400 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001401 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001402 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001403 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001404 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001405
1406 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001407 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001408 if (ret < 0) {
1409 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001410 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001411 }
1412
Max Reitzef810432014-12-02 18:32:42 +01001413 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001414 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001415 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001416 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001417 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001418 if (ret < 0) {
1419 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1420 "'%s': %s", tmp_filename,
1421 error_get_pretty(local_err));
1422 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001423 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001424 }
1425
1426 /* Prepare a new options QDict for the temporary file */
1427 snapshot_options = qdict_new();
1428 qdict_put(snapshot_options, "file.driver",
1429 qstring_from_str("file"));
1430 qdict_put(snapshot_options, "file.filename",
1431 qstring_from_str(tmp_filename));
1432
Markus Armbrustere4e99862014-10-07 13:59:03 +02001433 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001434
1435 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001436 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001437 if (ret < 0) {
1438 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001439 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001440 }
1441
1442 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001443
1444out:
1445 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001446 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001447}
1448
Max Reitzda557aa2013-12-20 19:28:11 +01001449/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001450 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001451 *
1452 * options is a QDict of options to pass to the block drivers, or NULL for an
1453 * empty set of options. The reference to the QDict belongs to the block layer
1454 * after the call (even on failure), so if the caller intends to reuse the
1455 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001456 *
1457 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1458 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001459 *
1460 * The reference parameter may be used to specify an existing block device which
1461 * should be opened. If specified, neither options nor a filename may be given,
1462 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001463 */
Max Reitzddf56362014-02-18 18:33:06 +01001464int bdrv_open(BlockDriverState **pbs, const char *filename,
1465 const char *reference, QDict *options, int flags,
1466 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001467{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001468 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001469 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001470 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001471 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001472 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001473
Max Reitzf67503e2014-02-18 18:33:05 +01001474 assert(pbs);
1475
Max Reitzddf56362014-02-18 18:33:06 +01001476 if (reference) {
1477 bool options_non_empty = options ? qdict_size(options) : false;
1478 QDECREF(options);
1479
1480 if (*pbs) {
1481 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1482 "another block device");
1483 return -EINVAL;
1484 }
1485
1486 if (filename || options_non_empty) {
1487 error_setg(errp, "Cannot reference an existing block device with "
1488 "additional options or a new filename");
1489 return -EINVAL;
1490 }
1491
1492 bs = bdrv_lookup_bs(reference, reference, errp);
1493 if (!bs) {
1494 return -ENODEV;
1495 }
1496 bdrv_ref(bs);
1497 *pbs = bs;
1498 return 0;
1499 }
1500
Max Reitzf67503e2014-02-18 18:33:05 +01001501 if (*pbs) {
1502 bs = *pbs;
1503 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001504 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001505 }
1506
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001507 /* NULL means an empty set of options */
1508 if (options == NULL) {
1509 options = qdict_new();
1510 }
1511
Kevin Wolf17b005f2014-05-27 10:50:29 +02001512 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001513 if (local_err) {
1514 goto fail;
1515 }
1516
Kevin Wolf76c591b2014-06-04 14:19:44 +02001517 /* Find the right image format driver */
1518 drv = NULL;
1519 drvname = qdict_get_try_str(options, "driver");
1520 if (drvname) {
1521 drv = bdrv_find_format(drvname);
1522 qdict_del(options, "driver");
1523 if (!drv) {
1524 error_setg(errp, "Unknown driver: '%s'", drvname);
1525 ret = -EINVAL;
1526 goto fail;
1527 }
1528 }
1529
1530 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1531 if (drv && !drv->bdrv_file_open) {
1532 /* If the user explicitly wants a format driver here, we'll need to add
1533 * another layer for the protocol in bs->file */
1534 flags &= ~BDRV_O_PROTOCOL;
1535 }
1536
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001537 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001538 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001539
Kevin Wolff4788ad2014-06-03 16:44:19 +02001540 /* Open image file without format layer */
1541 if ((flags & BDRV_O_PROTOCOL) == 0) {
1542 if (flags & BDRV_O_RDWR) {
1543 flags |= BDRV_O_ALLOW_RDWR;
1544 }
1545 if (flags & BDRV_O_SNAPSHOT) {
1546 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1547 flags = bdrv_backing_flags(flags);
1548 }
1549
1550 assert(file == NULL);
1551 ret = bdrv_open_image(&file, filename, options, "file",
1552 bdrv_inherited_flags(flags),
1553 true, &local_err);
1554 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001555 goto fail;
1556 }
1557 }
1558
Kevin Wolf76c591b2014-06-04 14:19:44 +02001559 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001560 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001561 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001562 ret = find_image_format(file, filename, &drv, &local_err);
1563 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001564 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001565 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001566 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001567 error_setg(errp, "Must specify either driver or file");
1568 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001569 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001570 }
1571
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001572 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001573 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001574 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001575 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001576 }
1577
Max Reitz2a05cbe2013-12-20 19:28:10 +01001578 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001579 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001580 file = NULL;
1581 }
1582
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001583 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001584 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001585 QDict *backing_options;
1586
Benoît Canet5726d872013-09-25 13:30:01 +02001587 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001588 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001589 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001590 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001591 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001592 }
1593
Max Reitz91af7012014-07-18 20:24:56 +02001594 bdrv_refresh_filename(bs);
1595
Kevin Wolfb9988752014-04-03 12:09:34 +02001596 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1597 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001598 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001599 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001600 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001601 goto close_and_fail;
1602 }
1603 }
1604
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001605 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001606 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001607 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001608 if (flags & BDRV_O_PROTOCOL) {
1609 error_setg(errp, "Block protocol '%s' doesn't support the option "
1610 "'%s'", drv->format_name, entry->key);
1611 } else {
1612 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1613 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001614 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001615 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001616
1617 ret = -EINVAL;
1618 goto close_and_fail;
1619 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001620
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001621 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001622 if (bs->blk) {
1623 blk_dev_change_media_cb(bs->blk, true);
1624 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001625 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1626 && !runstate_check(RUN_STATE_INMIGRATE)
1627 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1628 error_setg(errp,
1629 "Guest must be stopped for opening of encrypted image");
1630 ret = -EBUSY;
1631 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001632 }
1633
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001634 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001635 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001636 return 0;
1637
Kevin Wolf8bfea152014-04-11 19:16:36 +02001638fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001639 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001640 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001641 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001642 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001643 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001644 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001645 if (!*pbs) {
1646 /* If *pbs is NULL, a new BDS has been created in this function and
1647 needs to be freed now. Otherwise, it does not need to be closed,
1648 since it has not really been opened yet. */
1649 bdrv_unref(bs);
1650 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001651 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001652 error_propagate(errp, local_err);
1653 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001654 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001655
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001656close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001657 /* See fail path, but now the BDS has to be always closed */
1658 if (*pbs) {
1659 bdrv_close(bs);
1660 } else {
1661 bdrv_unref(bs);
1662 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001663 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001664 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001665 error_propagate(errp, local_err);
1666 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001667 return ret;
1668}
1669
Jeff Codye971aa12012-09-20 15:13:19 -04001670typedef struct BlockReopenQueueEntry {
1671 bool prepared;
1672 BDRVReopenState state;
1673 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1674} BlockReopenQueueEntry;
1675
1676/*
1677 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1678 * reopen of multiple devices.
1679 *
1680 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1681 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1682 * be created and initialized. This newly created BlockReopenQueue should be
1683 * passed back in for subsequent calls that are intended to be of the same
1684 * atomic 'set'.
1685 *
1686 * bs is the BlockDriverState to add to the reopen queue.
1687 *
1688 * flags contains the open flags for the associated bs
1689 *
1690 * returns a pointer to bs_queue, which is either the newly allocated
1691 * bs_queue, or the existing bs_queue being used.
1692 *
1693 */
1694BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1695 BlockDriverState *bs, int flags)
1696{
1697 assert(bs != NULL);
1698
1699 BlockReopenQueueEntry *bs_entry;
1700 if (bs_queue == NULL) {
1701 bs_queue = g_new0(BlockReopenQueue, 1);
1702 QSIMPLEQ_INIT(bs_queue);
1703 }
1704
Kevin Wolff1f25a22014-04-25 19:04:55 +02001705 /* bdrv_open() masks this flag out */
1706 flags &= ~BDRV_O_PROTOCOL;
1707
Jeff Codye971aa12012-09-20 15:13:19 -04001708 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001709 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001710 }
1711
1712 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1713 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1714
1715 bs_entry->state.bs = bs;
1716 bs_entry->state.flags = flags;
1717
1718 return bs_queue;
1719}
1720
1721/*
1722 * Reopen multiple BlockDriverStates atomically & transactionally.
1723 *
1724 * The queue passed in (bs_queue) must have been built up previous
1725 * via bdrv_reopen_queue().
1726 *
1727 * Reopens all BDS specified in the queue, with the appropriate
1728 * flags. All devices are prepared for reopen, and failure of any
1729 * device will cause all device changes to be abandonded, and intermediate
1730 * data cleaned up.
1731 *
1732 * If all devices prepare successfully, then the changes are committed
1733 * to all devices.
1734 *
1735 */
1736int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1737{
1738 int ret = -1;
1739 BlockReopenQueueEntry *bs_entry, *next;
1740 Error *local_err = NULL;
1741
1742 assert(bs_queue != NULL);
1743
1744 bdrv_drain_all();
1745
1746 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1747 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1748 error_propagate(errp, local_err);
1749 goto cleanup;
1750 }
1751 bs_entry->prepared = true;
1752 }
1753
1754 /* If we reach this point, we have success and just need to apply the
1755 * changes
1756 */
1757 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1758 bdrv_reopen_commit(&bs_entry->state);
1759 }
1760
1761 ret = 0;
1762
1763cleanup:
1764 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1765 if (ret && bs_entry->prepared) {
1766 bdrv_reopen_abort(&bs_entry->state);
1767 }
1768 g_free(bs_entry);
1769 }
1770 g_free(bs_queue);
1771 return ret;
1772}
1773
1774
1775/* Reopen a single BlockDriverState with the specified flags. */
1776int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1777{
1778 int ret = -1;
1779 Error *local_err = NULL;
1780 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1781
1782 ret = bdrv_reopen_multiple(queue, &local_err);
1783 if (local_err != NULL) {
1784 error_propagate(errp, local_err);
1785 }
1786 return ret;
1787}
1788
1789
1790/*
1791 * Prepares a BlockDriverState for reopen. All changes are staged in the
1792 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1793 * the block driver layer .bdrv_reopen_prepare()
1794 *
1795 * bs is the BlockDriverState to reopen
1796 * flags are the new open flags
1797 * queue is the reopen queue
1798 *
1799 * Returns 0 on success, non-zero on error. On error errp will be set
1800 * as well.
1801 *
1802 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1803 * It is the responsibility of the caller to then call the abort() or
1804 * commit() for any other BDS that have been left in a prepare() state
1805 *
1806 */
1807int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1808 Error **errp)
1809{
1810 int ret = -1;
1811 Error *local_err = NULL;
1812 BlockDriver *drv;
1813
1814 assert(reopen_state != NULL);
1815 assert(reopen_state->bs->drv != NULL);
1816 drv = reopen_state->bs->drv;
1817
1818 /* if we are to stay read-only, do not allow permission change
1819 * to r/w */
1820 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1821 reopen_state->flags & BDRV_O_RDWR) {
1822 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001823 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001824 goto error;
1825 }
1826
1827
1828 ret = bdrv_flush(reopen_state->bs);
1829 if (ret) {
1830 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1831 strerror(-ret));
1832 goto error;
1833 }
1834
1835 if (drv->bdrv_reopen_prepare) {
1836 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1837 if (ret) {
1838 if (local_err != NULL) {
1839 error_propagate(errp, local_err);
1840 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001841 error_setg(errp, "failed while preparing to reopen image '%s'",
1842 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001843 }
1844 goto error;
1845 }
1846 } else {
1847 /* It is currently mandatory to have a bdrv_reopen_prepare()
1848 * handler for each supported drv. */
1849 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001850 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001851 "reopening of file");
1852 ret = -1;
1853 goto error;
1854 }
1855
1856 ret = 0;
1857
1858error:
1859 return ret;
1860}
1861
1862/*
1863 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1864 * makes them final by swapping the staging BlockDriverState contents into
1865 * the active BlockDriverState contents.
1866 */
1867void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1868{
1869 BlockDriver *drv;
1870
1871 assert(reopen_state != NULL);
1872 drv = reopen_state->bs->drv;
1873 assert(drv != NULL);
1874
1875 /* If there are any driver level actions to take */
1876 if (drv->bdrv_reopen_commit) {
1877 drv->bdrv_reopen_commit(reopen_state);
1878 }
1879
1880 /* set BDS specific flags now */
1881 reopen_state->bs->open_flags = reopen_state->flags;
1882 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1883 BDRV_O_CACHE_WB);
1884 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001885
Kevin Wolf3baca892014-07-16 17:48:16 +02001886 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001887}
1888
1889/*
1890 * Abort the reopen, and delete and free the staged changes in
1891 * reopen_state
1892 */
1893void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1894{
1895 BlockDriver *drv;
1896
1897 assert(reopen_state != NULL);
1898 drv = reopen_state->bs->drv;
1899 assert(drv != NULL);
1900
1901 if (drv->bdrv_reopen_abort) {
1902 drv->bdrv_reopen_abort(reopen_state);
1903 }
1904}
1905
1906
bellardfc01f7e2003-06-30 10:03:06 +00001907void bdrv_close(BlockDriverState *bs)
1908{
Max Reitz33384422014-06-20 21:57:33 +02001909 BdrvAioNotifier *ban, *ban_next;
1910
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001911 if (bs->job) {
1912 block_job_cancel_sync(bs->job);
1913 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001914 bdrv_drain_all(); /* complete I/O */
1915 bdrv_flush(bs);
1916 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001917 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001918
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001919 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001920 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001921 BlockDriverState *backing_hd = bs->backing_hd;
1922 bdrv_set_backing_hd(bs, NULL);
1923 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001924 }
bellardea2384d2004-08-01 21:59:26 +00001925 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001926 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001927 bs->opaque = NULL;
1928 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001929 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001930 bs->backing_file[0] = '\0';
1931 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001932 bs->total_sectors = 0;
1933 bs->encrypted = 0;
1934 bs->valid_key = 0;
1935 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001936 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001937 QDECREF(bs->options);
1938 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001939 QDECREF(bs->full_open_options);
1940 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001941
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001942 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001943 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001944 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001945 }
bellardb3380822004-03-14 21:38:54 +00001946 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001947
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001948 if (bs->blk) {
1949 blk_dev_change_media_cb(bs->blk, false);
1950 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001951
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001952 /*throttling disk I/O limits*/
1953 if (bs->io_limits_enabled) {
1954 bdrv_io_limits_disable(bs);
1955 }
Max Reitz33384422014-06-20 21:57:33 +02001956
1957 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1958 g_free(ban);
1959 }
1960 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001961}
1962
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001963void bdrv_close_all(void)
1964{
1965 BlockDriverState *bs;
1966
Benoît Canetdc364f42014-01-23 21:31:32 +01001967 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001968 AioContext *aio_context = bdrv_get_aio_context(bs);
1969
1970 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001971 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001972 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001973 }
1974}
1975
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001976/* Check if any requests are in-flight (including throttled requests) */
1977static bool bdrv_requests_pending(BlockDriverState *bs)
1978{
1979 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1980 return true;
1981 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001982 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1983 return true;
1984 }
1985 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001986 return true;
1987 }
1988 if (bs->file && bdrv_requests_pending(bs->file)) {
1989 return true;
1990 }
1991 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1992 return true;
1993 }
1994 return false;
1995}
1996
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01001997static bool bdrv_drain_one(BlockDriverState *bs)
1998{
1999 bool bs_busy;
2000
2001 bdrv_flush_io_queue(bs);
2002 bdrv_start_throttled_reqs(bs);
2003 bs_busy = bdrv_requests_pending(bs);
2004 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2005 return bs_busy;
2006}
2007
2008/*
2009 * Wait for pending requests to complete on a single BlockDriverState subtree
2010 *
2011 * See the warning in bdrv_drain_all(). This function can only be called if
2012 * you are sure nothing can generate I/O because you have op blockers
2013 * installed.
2014 *
2015 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2016 * AioContext.
2017 */
2018void bdrv_drain(BlockDriverState *bs)
2019{
2020 while (bdrv_drain_one(bs)) {
2021 /* Keep iterating */
2022 }
2023}
2024
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002025/*
2026 * Wait for pending requests to complete across all BlockDriverStates
2027 *
2028 * This function does not flush data to disk, use bdrv_flush_all() for that
2029 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002030 *
2031 * Note that completion of an asynchronous I/O operation can trigger any
2032 * number of other I/O operations on other devices---for example a coroutine
2033 * can be arbitrarily complex and a constant flow of I/O can come until the
2034 * coroutine is complete. Because of this, it is not possible to have a
2035 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002036 */
2037void bdrv_drain_all(void)
2038{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002039 /* Always run first iteration so any pending completion BHs run */
2040 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002041 BlockDriverState *bs;
2042
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002043 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002044 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002045
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002046 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2047 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002048
2049 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002050 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002051 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002052 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002053 }
2054}
2055
Benoît Canetdc364f42014-01-23 21:31:32 +01002056/* make a BlockDriverState anonymous by removing from bdrv_state and
2057 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002058 Also, NULL terminate the device_name to prevent double remove */
2059void bdrv_make_anon(BlockDriverState *bs)
2060{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002061 /*
2062 * Take care to remove bs from bdrv_states only when it's actually
2063 * in it. Note that bs->device_list.tqe_prev is initially null,
2064 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2065 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2066 * resetting it to null on remove.
2067 */
2068 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002069 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002070 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002071 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002072 if (bs->node_name[0] != '\0') {
2073 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2074 }
2075 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002076}
2077
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002078static void bdrv_rebind(BlockDriverState *bs)
2079{
2080 if (bs->drv && bs->drv->bdrv_rebind) {
2081 bs->drv->bdrv_rebind(bs);
2082 }
2083}
2084
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002085static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2086 BlockDriverState *bs_src)
2087{
2088 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002089
2090 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002091 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002092 bs_dest->copy_on_read = bs_src->copy_on_read;
2093
2094 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2095
Benoît Canetcc0681c2013-09-02 14:14:39 +02002096 /* i/o throttled req */
2097 memcpy(&bs_dest->throttle_state,
2098 &bs_src->throttle_state,
2099 sizeof(ThrottleState));
2100 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2101 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002102 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2103
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002104 /* r/w error */
2105 bs_dest->on_read_error = bs_src->on_read_error;
2106 bs_dest->on_write_error = bs_src->on_write_error;
2107
2108 /* i/o status */
2109 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2110 bs_dest->iostatus = bs_src->iostatus;
2111
2112 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002113 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002114
Fam Zheng9fcb0252013-08-23 09:14:46 +08002115 /* reference count */
2116 bs_dest->refcnt = bs_src->refcnt;
2117
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002118 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002119 bs_dest->job = bs_src->job;
2120
2121 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002122 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002123 bs_dest->blk = bs_src->blk;
2124
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002125 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2126 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002127}
2128
2129/*
2130 * Swap bs contents for two image chains while they are live,
2131 * while keeping required fields on the BlockDriverState that is
2132 * actually attached to a device.
2133 *
2134 * This will modify the BlockDriverState fields, and swap contents
2135 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2136 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002137 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002138 *
2139 * This function does not create any image files.
2140 */
2141void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2142{
2143 BlockDriverState tmp;
2144
Benoît Canet90ce8a02014-03-05 23:48:29 +01002145 /* The code needs to swap the node_name but simply swapping node_list won't
2146 * work so first remove the nodes from the graph list, do the swap then
2147 * insert them back if needed.
2148 */
2149 if (bs_new->node_name[0] != '\0') {
2150 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2151 }
2152 if (bs_old->node_name[0] != '\0') {
2153 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2154 }
2155
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002156 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002157 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002158 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002159 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002160 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002161 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002162
2163 tmp = *bs_new;
2164 *bs_new = *bs_old;
2165 *bs_old = tmp;
2166
2167 /* there are some fields that should not be swapped, move them back */
2168 bdrv_move_feature_fields(&tmp, bs_old);
2169 bdrv_move_feature_fields(bs_old, bs_new);
2170 bdrv_move_feature_fields(bs_new, &tmp);
2171
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002172 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002173 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002174
2175 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002176 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002177 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002178 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002179
Benoît Canet90ce8a02014-03-05 23:48:29 +01002180 /* insert the nodes back into the graph node list if needed */
2181 if (bs_new->node_name[0] != '\0') {
2182 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2183 }
2184 if (bs_old->node_name[0] != '\0') {
2185 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2186 }
2187
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002188 bdrv_rebind(bs_new);
2189 bdrv_rebind(bs_old);
2190}
2191
Jeff Cody8802d1f2012-02-28 15:54:06 -05002192/*
2193 * Add new bs contents at the top of an image chain while the chain is
2194 * live, while keeping required fields on the top layer.
2195 *
2196 * This will modify the BlockDriverState fields, and swap contents
2197 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2198 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002199 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002200 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002201 * This function does not create any image files.
2202 */
2203void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2204{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002205 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002206
2207 /* The contents of 'tmp' will become bs_top, as we are
2208 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002209 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002210}
2211
Fam Zheng4f6fd342013-08-23 09:14:47 +08002212static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002213{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002214 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002215 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002216 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002217 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002218
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002219 bdrv_close(bs);
2220
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002221 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002222 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002223
Anthony Liguori7267c092011-08-20 22:09:37 -05002224 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002225}
2226
aliguorie97fc192009-04-21 23:11:50 +00002227/*
2228 * Run consistency checks on an image
2229 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002230 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002231 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002232 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002233 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002234int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002235{
Max Reitz908bcd52014-08-07 22:47:55 +02002236 if (bs->drv == NULL) {
2237 return -ENOMEDIUM;
2238 }
aliguorie97fc192009-04-21 23:11:50 +00002239 if (bs->drv->bdrv_check == NULL) {
2240 return -ENOTSUP;
2241 }
2242
Kevin Wolfe076f332010-06-29 11:43:13 +02002243 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002244 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002245}
2246
Kevin Wolf8a426612010-07-16 17:17:01 +02002247#define COMMIT_BUF_SECTORS 2048
2248
bellard33e39632003-07-06 17:15:21 +00002249/* commit COW file into the raw image */
2250int bdrv_commit(BlockDriverState *bs)
2251{
bellard19cb3732006-08-19 11:45:59 +00002252 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002253 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002254 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002255 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002256 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002257
bellard19cb3732006-08-19 11:45:59 +00002258 if (!drv)
2259 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002260
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002261 if (!bs->backing_hd) {
2262 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002263 }
2264
Fam Zhengbb000212014-09-11 13:14:00 +08002265 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2266 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002267 return -EBUSY;
2268 }
2269
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002270 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002271 open_flags = bs->backing_hd->open_flags;
2272
2273 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002274 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2275 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002276 }
bellard33e39632003-07-06 17:15:21 +00002277 }
bellardea2384d2004-08-01 21:59:26 +00002278
Jeff Cody72706ea2014-01-24 09:02:35 -05002279 length = bdrv_getlength(bs);
2280 if (length < 0) {
2281 ret = length;
2282 goto ro_cleanup;
2283 }
2284
2285 backing_length = bdrv_getlength(bs->backing_hd);
2286 if (backing_length < 0) {
2287 ret = backing_length;
2288 goto ro_cleanup;
2289 }
2290
2291 /* If our top snapshot is larger than the backing file image,
2292 * grow the backing file image if possible. If not possible,
2293 * we must return an error */
2294 if (length > backing_length) {
2295 ret = bdrv_truncate(bs->backing_hd, length);
2296 if (ret < 0) {
2297 goto ro_cleanup;
2298 }
2299 }
2300
2301 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002302
2303 /* qemu_try_blockalign() for bs will choose an alignment that works for
2304 * bs->backing_hd as well, so no need to compare the alignment manually. */
2305 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2306 if (buf == NULL) {
2307 ret = -ENOMEM;
2308 goto ro_cleanup;
2309 }
bellardea2384d2004-08-01 21:59:26 +00002310
Kevin Wolf8a426612010-07-16 17:17:01 +02002311 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002312 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2313 if (ret < 0) {
2314 goto ro_cleanup;
2315 }
2316 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002317 ret = bdrv_read(bs, sector, buf, n);
2318 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002319 goto ro_cleanup;
2320 }
2321
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002322 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2323 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002324 goto ro_cleanup;
2325 }
bellardea2384d2004-08-01 21:59:26 +00002326 }
2327 }
bellard95389c82005-12-18 18:28:15 +00002328
Christoph Hellwig1d449522010-01-17 12:32:30 +01002329 if (drv->bdrv_make_empty) {
2330 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002331 if (ret < 0) {
2332 goto ro_cleanup;
2333 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002334 bdrv_flush(bs);
2335 }
bellard95389c82005-12-18 18:28:15 +00002336
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002337 /*
2338 * Make sure all data we wrote to the backing device is actually
2339 * stable on disk.
2340 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002341 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002342 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002343 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002344
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002345 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002346ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002347 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002348
2349 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002350 /* ignoring error return here */
2351 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002352 }
2353
Christoph Hellwig1d449522010-01-17 12:32:30 +01002354 return ret;
bellard33e39632003-07-06 17:15:21 +00002355}
2356
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002357int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002358{
2359 BlockDriverState *bs;
2360
Benoît Canetdc364f42014-01-23 21:31:32 +01002361 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002362 AioContext *aio_context = bdrv_get_aio_context(bs);
2363
2364 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002365 if (bs->drv && bs->backing_hd) {
2366 int ret = bdrv_commit(bs);
2367 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002368 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002369 return ret;
2370 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002371 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002372 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002373 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002374 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002375}
2376
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002377/**
2378 * Remove an active request from the tracked requests list
2379 *
2380 * This function should be called when a tracked request is completing.
2381 */
2382static void tracked_request_end(BdrvTrackedRequest *req)
2383{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002384 if (req->serialising) {
2385 req->bs->serialising_in_flight--;
2386 }
2387
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002388 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002389 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002390}
2391
2392/**
2393 * Add an active request to the tracked requests list
2394 */
2395static void tracked_request_begin(BdrvTrackedRequest *req,
2396 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002397 int64_t offset,
2398 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002399{
2400 *req = (BdrvTrackedRequest){
2401 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002402 .offset = offset,
2403 .bytes = bytes,
2404 .is_write = is_write,
2405 .co = qemu_coroutine_self(),
2406 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002407 .overlap_offset = offset,
2408 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002409 };
2410
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002411 qemu_co_queue_init(&req->wait_queue);
2412
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002413 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2414}
2415
Kevin Wolfe96126f2014-02-08 10:42:18 +01002416static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002417{
Kevin Wolf73271452013-12-04 17:08:50 +01002418 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002419 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2420 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002421
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002422 if (!req->serialising) {
2423 req->bs->serialising_in_flight++;
2424 req->serialising = true;
2425 }
Kevin Wolf73271452013-12-04 17:08:50 +01002426
2427 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2428 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002429}
2430
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002431/**
2432 * Round a region to cluster boundaries
2433 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002434void bdrv_round_to_clusters(BlockDriverState *bs,
2435 int64_t sector_num, int nb_sectors,
2436 int64_t *cluster_sector_num,
2437 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002438{
2439 BlockDriverInfo bdi;
2440
2441 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2442 *cluster_sector_num = sector_num;
2443 *cluster_nb_sectors = nb_sectors;
2444 } else {
2445 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2446 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2447 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2448 nb_sectors, c);
2449 }
2450}
2451
Kevin Wolf73271452013-12-04 17:08:50 +01002452static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002453{
2454 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002455 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002456
Kevin Wolf73271452013-12-04 17:08:50 +01002457 ret = bdrv_get_info(bs, &bdi);
2458 if (ret < 0 || bdi.cluster_size == 0) {
2459 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002460 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002461 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002462 }
2463}
2464
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002465static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002466 int64_t offset, unsigned int bytes)
2467{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002468 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002469 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002470 return false;
2471 }
2472 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002473 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002474 return false;
2475 }
2476 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002477}
2478
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002479static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002480{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002481 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002482 BdrvTrackedRequest *req;
2483 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002484 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002485
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002486 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002487 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002488 }
2489
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002490 do {
2491 retry = false;
2492 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002493 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002494 continue;
2495 }
Kevin Wolf73271452013-12-04 17:08:50 +01002496 if (tracked_request_overlaps(req, self->overlap_offset,
2497 self->overlap_bytes))
2498 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002499 /* Hitting this means there was a reentrant request, for
2500 * example, a block driver issuing nested requests. This must
2501 * never happen since it means deadlock.
2502 */
2503 assert(qemu_coroutine_self() != req->co);
2504
Kevin Wolf64604402013-12-13 13:04:35 +01002505 /* If the request is already (indirectly) waiting for us, or
2506 * will wait for us as soon as it wakes up, then just go on
2507 * (instead of producing a deadlock in the former case). */
2508 if (!req->waiting_for) {
2509 self->waiting_for = req;
2510 qemu_co_queue_wait(&req->wait_queue);
2511 self->waiting_for = NULL;
2512 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002513 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002514 break;
2515 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002516 }
2517 }
2518 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002519
2520 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002521}
2522
Kevin Wolf756e6732010-01-12 12:55:17 +01002523/*
2524 * Return values:
2525 * 0 - success
2526 * -EINVAL - backing format specified, but no file
2527 * -ENOSPC - can't update the backing file because no space is left in the
2528 * image file header
2529 * -ENOTSUP - format driver doesn't support changing the backing file
2530 */
2531int bdrv_change_backing_file(BlockDriverState *bs,
2532 const char *backing_file, const char *backing_fmt)
2533{
2534 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002535 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002536
Paolo Bonzini5f377792012-04-12 14:01:01 +02002537 /* Backing file format doesn't make sense without a backing file */
2538 if (backing_fmt && !backing_file) {
2539 return -EINVAL;
2540 }
2541
Kevin Wolf756e6732010-01-12 12:55:17 +01002542 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002543 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002544 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002545 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002546 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002547
2548 if (ret == 0) {
2549 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2550 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2551 }
2552 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002553}
2554
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002555/*
2556 * Finds the image layer in the chain that has 'bs' as its backing file.
2557 *
2558 * active is the current topmost image.
2559 *
2560 * Returns NULL if bs is not found in active's image chain,
2561 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002562 *
2563 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002564 */
2565BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2566 BlockDriverState *bs)
2567{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002568 while (active && bs != active->backing_hd) {
2569 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002570 }
2571
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002572 return active;
2573}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002574
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002575/* Given a BDS, searches for the base layer. */
2576BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2577{
2578 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002579}
2580
2581typedef struct BlkIntermediateStates {
2582 BlockDriverState *bs;
2583 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2584} BlkIntermediateStates;
2585
2586
2587/*
2588 * Drops images above 'base' up to and including 'top', and sets the image
2589 * above 'top' to have base as its backing file.
2590 *
2591 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2592 * information in 'bs' can be properly updated.
2593 *
2594 * E.g., this will convert the following chain:
2595 * bottom <- base <- intermediate <- top <- active
2596 *
2597 * to
2598 *
2599 * bottom <- base <- active
2600 *
2601 * It is allowed for bottom==base, in which case it converts:
2602 *
2603 * base <- intermediate <- top <- active
2604 *
2605 * to
2606 *
2607 * base <- active
2608 *
Jeff Cody54e26902014-06-25 15:40:10 -04002609 * If backing_file_str is non-NULL, it will be used when modifying top's
2610 * overlay image metadata.
2611 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002612 * Error conditions:
2613 * if active == top, that is considered an error
2614 *
2615 */
2616int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002617 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002618{
2619 BlockDriverState *intermediate;
2620 BlockDriverState *base_bs = NULL;
2621 BlockDriverState *new_top_bs = NULL;
2622 BlkIntermediateStates *intermediate_state, *next;
2623 int ret = -EIO;
2624
2625 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2626 QSIMPLEQ_INIT(&states_to_delete);
2627
2628 if (!top->drv || !base->drv) {
2629 goto exit;
2630 }
2631
2632 new_top_bs = bdrv_find_overlay(active, top);
2633
2634 if (new_top_bs == NULL) {
2635 /* we could not find the image above 'top', this is an error */
2636 goto exit;
2637 }
2638
2639 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2640 * to do, no intermediate images */
2641 if (new_top_bs->backing_hd == base) {
2642 ret = 0;
2643 goto exit;
2644 }
2645
2646 intermediate = top;
2647
2648 /* now we will go down through the list, and add each BDS we find
2649 * into our deletion queue, until we hit the 'base'
2650 */
2651 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002652 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002653 intermediate_state->bs = intermediate;
2654 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2655
2656 if (intermediate->backing_hd == base) {
2657 base_bs = intermediate->backing_hd;
2658 break;
2659 }
2660 intermediate = intermediate->backing_hd;
2661 }
2662 if (base_bs == NULL) {
2663 /* something went wrong, we did not end at the base. safely
2664 * unravel everything, and exit with error */
2665 goto exit;
2666 }
2667
2668 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002669 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2670 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002671 base_bs->drv ? base_bs->drv->format_name : "");
2672 if (ret) {
2673 goto exit;
2674 }
Fam Zheng920beae2014-05-23 21:29:46 +08002675 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002676
2677 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2678 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002679 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002680 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002681 }
2682 ret = 0;
2683
2684exit:
2685 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2686 g_free(intermediate_state);
2687 }
2688 return ret;
2689}
2690
2691
aliguori71d07702009-03-03 17:37:16 +00002692static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2693 size_t size)
2694{
Peter Lieven75af1f32015-02-06 11:54:11 +01002695 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002696 return -EIO;
2697 }
2698
Max Reitzc0191e72015-02-05 13:58:24 -05002699 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002700 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002701 }
aliguori71d07702009-03-03 17:37:16 +00002702
Max Reitzc0191e72015-02-05 13:58:24 -05002703 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002704 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002705 }
aliguori71d07702009-03-03 17:37:16 +00002706
2707 return 0;
2708}
2709
2710static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2711 int nb_sectors)
2712{
Peter Lieven75af1f32015-02-06 11:54:11 +01002713 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002714 return -EIO;
2715 }
2716
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002717 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2718 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002719}
2720
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002721typedef struct RwCo {
2722 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002723 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002724 QEMUIOVector *qiov;
2725 bool is_write;
2726 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002727 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002728} RwCo;
2729
2730static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2731{
2732 RwCo *rwco = opaque;
2733
2734 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002735 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2736 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002737 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002738 } else {
2739 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2740 rwco->qiov->size, rwco->qiov,
2741 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002742 }
2743}
2744
2745/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002746 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002747 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002748static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2749 QEMUIOVector *qiov, bool is_write,
2750 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002751{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002752 Coroutine *co;
2753 RwCo rwco = {
2754 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002755 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002756 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002757 .is_write = is_write,
2758 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002759 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002760 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002761
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002762 /**
2763 * In sync call context, when the vcpu is blocked, this throttling timer
2764 * will not fire; so the I/O throttling function has to be disabled here
2765 * if it has been enabled.
2766 */
2767 if (bs->io_limits_enabled) {
2768 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2769 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2770 bdrv_io_limits_disable(bs);
2771 }
2772
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002773 if (qemu_in_coroutine()) {
2774 /* Fast-path if already in coroutine context */
2775 bdrv_rw_co_entry(&rwco);
2776 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002777 AioContext *aio_context = bdrv_get_aio_context(bs);
2778
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002779 co = qemu_coroutine_create(bdrv_rw_co_entry);
2780 qemu_coroutine_enter(co, &rwco);
2781 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002782 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002783 }
2784 }
2785 return rwco.ret;
2786}
2787
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002788/*
2789 * Process a synchronous request using coroutines
2790 */
2791static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002792 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002793{
2794 QEMUIOVector qiov;
2795 struct iovec iov = {
2796 .iov_base = (void *)buf,
2797 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2798 };
2799
Peter Lieven75af1f32015-02-06 11:54:11 +01002800 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002801 return -EINVAL;
2802 }
2803
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002804 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002805 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2806 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002807}
2808
bellard19cb3732006-08-19 11:45:59 +00002809/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002810int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002811 uint8_t *buf, int nb_sectors)
2812{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002813 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002814}
2815
Markus Armbruster07d27a42012-06-29 17:34:29 +02002816/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2817int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2818 uint8_t *buf, int nb_sectors)
2819{
2820 bool enabled;
2821 int ret;
2822
2823 enabled = bs->io_limits_enabled;
2824 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002825 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002826 bs->io_limits_enabled = enabled;
2827 return ret;
2828}
2829
ths5fafdf22007-09-16 21:08:06 +00002830/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002831 -EIO generic I/O error (may happen for all errors)
2832 -ENOMEDIUM No media inserted.
2833 -EINVAL Invalid sector number or nb_sectors
2834 -EACCES Trying to write a read-only device
2835*/
ths5fafdf22007-09-16 21:08:06 +00002836int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002837 const uint8_t *buf, int nb_sectors)
2838{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002839 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002840}
2841
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002842int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2843 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002844{
2845 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002846 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002847}
2848
Peter Lievend75cbb52013-10-24 12:07:03 +02002849/*
2850 * Completely zero out a block device with the help of bdrv_write_zeroes.
2851 * The operation is sped up by checking the block status and only writing
2852 * zeroes to the device if they currently do not return zeroes. Optional
2853 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2854 *
2855 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2856 */
2857int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2858{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002859 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002860 int n;
2861
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002862 target_sectors = bdrv_nb_sectors(bs);
2863 if (target_sectors < 0) {
2864 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002865 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002866
Peter Lievend75cbb52013-10-24 12:07:03 +02002867 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002868 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002869 if (nb_sectors <= 0) {
2870 return 0;
2871 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002872 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002873 if (ret < 0) {
2874 error_report("error getting block status at sector %" PRId64 ": %s",
2875 sector_num, strerror(-ret));
2876 return ret;
2877 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002878 if (ret & BDRV_BLOCK_ZERO) {
2879 sector_num += n;
2880 continue;
2881 }
2882 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2883 if (ret < 0) {
2884 error_report("error writing zeroes at sector %" PRId64 ": %s",
2885 sector_num, strerror(-ret));
2886 return ret;
2887 }
2888 sector_num += n;
2889 }
2890}
2891
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002892int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002893{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002894 QEMUIOVector qiov;
2895 struct iovec iov = {
2896 .iov_base = (void *)buf,
2897 .iov_len = bytes,
2898 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002899 int ret;
bellard83f64092006-08-01 16:21:11 +00002900
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002901 if (bytes < 0) {
2902 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002903 }
2904
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002905 qemu_iovec_init_external(&qiov, &iov, 1);
2906 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2907 if (ret < 0) {
2908 return ret;
bellard83f64092006-08-01 16:21:11 +00002909 }
2910
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002911 return bytes;
bellard83f64092006-08-01 16:21:11 +00002912}
2913
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002914int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002915{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002916 int ret;
bellard83f64092006-08-01 16:21:11 +00002917
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002918 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2919 if (ret < 0) {
2920 return ret;
bellard83f64092006-08-01 16:21:11 +00002921 }
2922
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002923 return qiov->size;
2924}
2925
2926int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002927 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002928{
2929 QEMUIOVector qiov;
2930 struct iovec iov = {
2931 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002932 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002933 };
2934
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002935 if (bytes < 0) {
2936 return -EINVAL;
2937 }
2938
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002939 qemu_iovec_init_external(&qiov, &iov, 1);
2940 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002941}
bellard83f64092006-08-01 16:21:11 +00002942
Kevin Wolff08145f2010-06-16 16:38:15 +02002943/*
2944 * Writes to the file and ensures that no writes are reordered across this
2945 * request (acts as a barrier)
2946 *
2947 * Returns 0 on success, -errno in error cases.
2948 */
2949int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2950 const void *buf, int count)
2951{
2952 int ret;
2953
2954 ret = bdrv_pwrite(bs, offset, buf, count);
2955 if (ret < 0) {
2956 return ret;
2957 }
2958
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002959 /* No flush needed for cache modes that already do it */
2960 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002961 bdrv_flush(bs);
2962 }
2963
2964 return 0;
2965}
2966
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002967static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002968 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2969{
2970 /* Perform I/O through a temporary buffer so that users who scribble over
2971 * their read buffer while the operation is in progress do not end up
2972 * modifying the image file. This is critical for zero-copy guest I/O
2973 * where anything might happen inside guest memory.
2974 */
2975 void *bounce_buffer;
2976
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002977 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002978 struct iovec iov;
2979 QEMUIOVector bounce_qiov;
2980 int64_t cluster_sector_num;
2981 int cluster_nb_sectors;
2982 size_t skip_bytes;
2983 int ret;
2984
2985 /* Cover entire cluster so no additional backing file I/O is required when
2986 * allocating cluster in the image file.
2987 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002988 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2989 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002990
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002991 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2992 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002993
2994 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002995 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
2996 if (bounce_buffer == NULL) {
2997 ret = -ENOMEM;
2998 goto err;
2999 }
3000
Stefan Hajnocziab185922011-11-17 13:40:31 +00003001 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3002
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003003 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3004 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003005 if (ret < 0) {
3006 goto err;
3007 }
3008
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003009 if (drv->bdrv_co_write_zeroes &&
3010 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003011 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003012 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003013 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003014 /* This does not change the data on the disk, it is not necessary
3015 * to flush even in cache=writethrough mode.
3016 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003017 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003018 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003019 }
3020
Stefan Hajnocziab185922011-11-17 13:40:31 +00003021 if (ret < 0) {
3022 /* It might be okay to ignore write errors for guest requests. If this
3023 * is a deliberate copy-on-read then we don't want to ignore the error.
3024 * Simply report it in all cases.
3025 */
3026 goto err;
3027 }
3028
3029 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003030 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3031 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003032
3033err:
3034 qemu_vfree(bounce_buffer);
3035 return ret;
3036}
3037
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003038/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003039 * Forwards an already correctly aligned request to the BlockDriver. This
3040 * handles copy on read and zeroing after EOF; any other features must be
3041 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003042 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003043static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003044 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003045 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003046{
3047 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003048 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003049
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003050 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3051 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003052
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003053 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3054 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003055 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003056
3057 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003058 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003059 /* If we touch the same cluster it counts as an overlap. This
3060 * guarantees that allocating writes will be serialized and not race
3061 * with each other for the same cluster. For example, in copy-on-read
3062 * it ensures that the CoR read and write operations are atomic and
3063 * guest writes cannot interleave between them. */
3064 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003065 }
3066
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003067 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003068
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003069 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003070 int pnum;
3071
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003072 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003073 if (ret < 0) {
3074 goto out;
3075 }
3076
3077 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003078 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003079 goto out;
3080 }
3081 }
3082
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003083 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003084 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003085 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3086 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003087 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003088 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003089
Markus Armbruster40490822014-06-26 13:23:19 +02003090 total_sectors = bdrv_nb_sectors(bs);
3091 if (total_sectors < 0) {
3092 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003093 goto out;
3094 }
3095
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003096 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3097 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003098 if (nb_sectors < max_nb_sectors) {
3099 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3100 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003101 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003102
3103 qemu_iovec_init(&local_qiov, qiov->niov);
3104 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003105 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003106
Paolo Bonzinie012b782014-12-17 16:09:59 +01003107 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003108 &local_qiov);
3109
3110 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003111 } else {
3112 ret = 0;
3113 }
3114
3115 /* Reading beyond end of file is supposed to produce zeroes */
3116 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3117 uint64_t offset = MAX(0, total_sectors - sector_num);
3118 uint64_t bytes = (sector_num + nb_sectors - offset) *
3119 BDRV_SECTOR_SIZE;
3120 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3121 }
3122 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003123
3124out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003125 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003126}
3127
Fam Zhengfc3959e2015-03-24 09:23:49 +08003128static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3129{
3130 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3131 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3132}
3133
3134static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3135 int64_t offset, size_t bytes)
3136{
3137 int64_t align = bdrv_get_align(bs);
3138 return !(offset & (align - 1) || (bytes & (align - 1)));
3139}
3140
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003141/*
3142 * Handle a read request in coroutine context
3143 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003144static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3145 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003146 BdrvRequestFlags flags)
3147{
3148 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003149 BdrvTrackedRequest req;
3150
Fam Zhengfc3959e2015-03-24 09:23:49 +08003151 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003152 uint8_t *head_buf = NULL;
3153 uint8_t *tail_buf = NULL;
3154 QEMUIOVector local_qiov;
3155 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003156 int ret;
3157
3158 if (!drv) {
3159 return -ENOMEDIUM;
3160 }
Max Reitzb9c64942015-02-05 13:58:25 -05003161
3162 ret = bdrv_check_byte_request(bs, offset, bytes);
3163 if (ret < 0) {
3164 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003165 }
3166
3167 if (bs->copy_on_read) {
3168 flags |= BDRV_REQ_COPY_ON_READ;
3169 }
3170
3171 /* throttling disk I/O */
3172 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003173 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003174 }
3175
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003176 /* Align read if necessary by padding qiov */
3177 if (offset & (align - 1)) {
3178 head_buf = qemu_blockalign(bs, align);
3179 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3180 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3181 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3182 use_local_qiov = true;
3183
3184 bytes += offset & (align - 1);
3185 offset = offset & ~(align - 1);
3186 }
3187
3188 if ((offset + bytes) & (align - 1)) {
3189 if (!use_local_qiov) {
3190 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3191 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3192 use_local_qiov = true;
3193 }
3194 tail_buf = qemu_blockalign(bs, align);
3195 qemu_iovec_add(&local_qiov, tail_buf,
3196 align - ((offset + bytes) & (align - 1)));
3197
3198 bytes = ROUND_UP(bytes, align);
3199 }
3200
Kevin Wolf65afd212013-12-03 14:55:55 +01003201 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003202 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003203 use_local_qiov ? &local_qiov : qiov,
3204 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003205 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003206
3207 if (use_local_qiov) {
3208 qemu_iovec_destroy(&local_qiov);
3209 qemu_vfree(head_buf);
3210 qemu_vfree(tail_buf);
3211 }
3212
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003213 return ret;
3214}
3215
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003216static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3217 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3218 BdrvRequestFlags flags)
3219{
Peter Lieven75af1f32015-02-06 11:54:11 +01003220 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003221 return -EINVAL;
3222 }
3223
3224 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3225 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3226}
3227
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003228int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003229 int nb_sectors, QEMUIOVector *qiov)
3230{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003231 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003232
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003233 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3234}
3235
3236int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3237 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3238{
3239 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3240
3241 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3242 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003243}
3244
Peter Lieven98764152015-02-02 15:48:34 +01003245#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003246
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003247static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003248 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003249{
3250 BlockDriver *drv = bs->drv;
3251 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003252 struct iovec iov = {0};
3253 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003254
Peter Lieven75af1f32015-02-06 11:54:11 +01003255 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3256 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003257
Peter Lievenc31cb702013-10-24 12:06:58 +02003258 while (nb_sectors > 0 && !ret) {
3259 int num = nb_sectors;
3260
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003261 /* Align request. Block drivers can expect the "bulk" of the request
3262 * to be aligned.
3263 */
3264 if (bs->bl.write_zeroes_alignment
3265 && num > bs->bl.write_zeroes_alignment) {
3266 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3267 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003268 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003269 num -= sector_num % bs->bl.write_zeroes_alignment;
3270 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3271 /* Shorten the request to the last aligned sector. num cannot
3272 * underflow because num > bs->bl.write_zeroes_alignment.
3273 */
3274 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003275 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003276 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003277
3278 /* limit request size */
3279 if (num > max_write_zeroes) {
3280 num = max_write_zeroes;
3281 }
3282
3283 ret = -ENOTSUP;
3284 /* First try the efficient write zeroes operation */
3285 if (drv->bdrv_co_write_zeroes) {
3286 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3287 }
3288
3289 if (ret == -ENOTSUP) {
3290 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003291 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003292 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003293 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003294 iov.iov_len = num * BDRV_SECTOR_SIZE;
3295 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003296 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3297 if (iov.iov_base == NULL) {
3298 ret = -ENOMEM;
3299 goto fail;
3300 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003301 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003302 }
3303 qemu_iovec_init_external(&qiov, &iov, 1);
3304
3305 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003306
3307 /* Keep bounce buffer around if it is big enough for all
3308 * all future requests.
3309 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003310 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003311 qemu_vfree(iov.iov_base);
3312 iov.iov_base = NULL;
3313 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003314 }
3315
3316 sector_num += num;
3317 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003318 }
3319
Kevin Wolf857d4f42014-05-20 13:16:51 +02003320fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003321 qemu_vfree(iov.iov_base);
3322 return ret;
3323}
3324
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003325/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003326 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003327 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003328static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003329 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3330 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003331{
3332 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003333 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003334 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003335
Kevin Wolfb404f722013-12-03 14:02:23 +01003336 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3337 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003338
Kevin Wolfb404f722013-12-03 14:02:23 +01003339 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3340 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003341 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003342
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003343 waited = wait_serialising_requests(req);
3344 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003345 assert(req->overlap_offset <= offset);
3346 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003347
Kevin Wolf65afd212013-12-03 14:55:55 +01003348 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003349
Peter Lieven465bee12014-05-18 00:58:19 +02003350 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3351 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3352 qemu_iovec_is_zero(qiov)) {
3353 flags |= BDRV_REQ_ZERO_WRITE;
3354 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3355 flags |= BDRV_REQ_MAY_UNMAP;
3356 }
3357 }
3358
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003359 if (ret < 0) {
3360 /* Do nothing, write notifier decided to fail this request */
3361 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003362 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003363 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003364 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003365 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003366 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3367 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003368 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003369
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003370 if (ret == 0 && !bs->enable_write_cache) {
3371 ret = bdrv_co_flush(bs);
3372 }
3373
Fam Zhenge4654d22013-11-13 18:29:43 +08003374 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003375
Benoît Canet5366d0c2014-09-05 15:46:18 +02003376 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003377
Max Reitzc0191e72015-02-05 13:58:24 -05003378 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003379 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3380 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003381
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003382 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003383}
3384
Kevin Wolfb404f722013-12-03 14:02:23 +01003385/*
3386 * Handle a write request in coroutine context
3387 */
Kevin Wolf66015532013-12-03 14:40:18 +01003388static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3389 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003390 BdrvRequestFlags flags)
3391{
Kevin Wolf65afd212013-12-03 14:55:55 +01003392 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003393 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003394 uint8_t *head_buf = NULL;
3395 uint8_t *tail_buf = NULL;
3396 QEMUIOVector local_qiov;
3397 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003398 int ret;
3399
3400 if (!bs->drv) {
3401 return -ENOMEDIUM;
3402 }
3403 if (bs->read_only) {
3404 return -EACCES;
3405 }
Max Reitzb9c64942015-02-05 13:58:25 -05003406
3407 ret = bdrv_check_byte_request(bs, offset, bytes);
3408 if (ret < 0) {
3409 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003410 }
3411
Kevin Wolfb404f722013-12-03 14:02:23 +01003412 /* throttling disk I/O */
3413 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003414 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003415 }
3416
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003417 /*
3418 * Align write if necessary by performing a read-modify-write cycle.
3419 * Pad qiov with the read parts and be sure to have a tracked request not
3420 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3421 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003422 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003423
3424 if (offset & (align - 1)) {
3425 QEMUIOVector head_qiov;
3426 struct iovec head_iov;
3427
3428 mark_request_serialising(&req, align);
3429 wait_serialising_requests(&req);
3430
3431 head_buf = qemu_blockalign(bs, align);
3432 head_iov = (struct iovec) {
3433 .iov_base = head_buf,
3434 .iov_len = align,
3435 };
3436 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3437
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003438 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003439 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3440 align, &head_qiov, 0);
3441 if (ret < 0) {
3442 goto fail;
3443 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003444 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003445
3446 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3447 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3448 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3449 use_local_qiov = true;
3450
3451 bytes += offset & (align - 1);
3452 offset = offset & ~(align - 1);
3453 }
3454
3455 if ((offset + bytes) & (align - 1)) {
3456 QEMUIOVector tail_qiov;
3457 struct iovec tail_iov;
3458 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003459 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003460
3461 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003462 waited = wait_serialising_requests(&req);
3463 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003464
3465 tail_buf = qemu_blockalign(bs, align);
3466 tail_iov = (struct iovec) {
3467 .iov_base = tail_buf,
3468 .iov_len = align,
3469 };
3470 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3471
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003472 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003473 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3474 align, &tail_qiov, 0);
3475 if (ret < 0) {
3476 goto fail;
3477 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003478 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003479
3480 if (!use_local_qiov) {
3481 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3482 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3483 use_local_qiov = true;
3484 }
3485
3486 tail_bytes = (offset + bytes) & (align - 1);
3487 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3488
3489 bytes = ROUND_UP(bytes, align);
3490 }
3491
Fam Zhengfc3959e2015-03-24 09:23:49 +08003492 if (use_local_qiov) {
3493 /* Local buffer may have non-zero data. */
3494 flags &= ~BDRV_REQ_ZERO_WRITE;
3495 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003496 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3497 use_local_qiov ? &local_qiov : qiov,
3498 flags);
3499
3500fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003501 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003502
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003503 if (use_local_qiov) {
3504 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003505 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003506 qemu_vfree(head_buf);
3507 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003508
Kevin Wolfb404f722013-12-03 14:02:23 +01003509 return ret;
3510}
3511
Kevin Wolf66015532013-12-03 14:40:18 +01003512static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3513 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3514 BdrvRequestFlags flags)
3515{
Peter Lieven75af1f32015-02-06 11:54:11 +01003516 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003517 return -EINVAL;
3518 }
3519
3520 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3521 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3522}
3523
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003524int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3525 int nb_sectors, QEMUIOVector *qiov)
3526{
3527 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3528
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003529 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3530}
3531
3532int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003533 int64_t sector_num, int nb_sectors,
3534 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003535{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003536 int ret;
3537
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003538 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003539
Peter Lievend32f35c2013-10-24 12:06:52 +02003540 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3541 flags &= ~BDRV_REQ_MAY_UNMAP;
3542 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003543 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3544 nb_sectors << BDRV_SECTOR_BITS)) {
3545 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3546 BDRV_REQ_ZERO_WRITE | flags);
3547 } else {
3548 uint8_t *buf;
3549 QEMUIOVector local_qiov;
3550 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003551
Fam Zhengfc3959e2015-03-24 09:23:49 +08003552 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3553 memset(buf, 0, bytes);
3554 qemu_iovec_init(&local_qiov, 1);
3555 qemu_iovec_add(&local_qiov, buf, bytes);
3556
3557 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3558 BDRV_REQ_ZERO_WRITE | flags);
3559 qemu_vfree(buf);
3560 }
3561 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003562}
3563
bellard83f64092006-08-01 16:21:11 +00003564/**
bellard83f64092006-08-01 16:21:11 +00003565 * Truncate file to 'offset' bytes (needed only for file protocols)
3566 */
3567int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3568{
3569 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003570 int ret;
bellard83f64092006-08-01 16:21:11 +00003571 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003572 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003573 if (!drv->bdrv_truncate)
3574 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003575 if (bs->read_only)
3576 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003577
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003578 ret = drv->bdrv_truncate(bs, offset);
3579 if (ret == 0) {
3580 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003581 if (bs->blk) {
3582 blk_dev_resize_cb(bs->blk);
3583 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003584 }
3585 return ret;
bellard83f64092006-08-01 16:21:11 +00003586}
3587
3588/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003589 * Length of a allocated file in bytes. Sparse files are counted by actual
3590 * allocated space. Return < 0 if error or unknown.
3591 */
3592int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3593{
3594 BlockDriver *drv = bs->drv;
3595 if (!drv) {
3596 return -ENOMEDIUM;
3597 }
3598 if (drv->bdrv_get_allocated_file_size) {
3599 return drv->bdrv_get_allocated_file_size(bs);
3600 }
3601 if (bs->file) {
3602 return bdrv_get_allocated_file_size(bs->file);
3603 }
3604 return -ENOTSUP;
3605}
3606
3607/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003608 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003609 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003610int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003611{
3612 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003613
bellard83f64092006-08-01 16:21:11 +00003614 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003615 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003616
Kevin Wolfb94a2612013-10-29 12:18:58 +01003617 if (drv->has_variable_length) {
3618 int ret = refresh_total_sectors(bs, bs->total_sectors);
3619 if (ret < 0) {
3620 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003621 }
bellard83f64092006-08-01 16:21:11 +00003622 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003623 return bs->total_sectors;
3624}
3625
3626/**
3627 * Return length in bytes on success, -errno on error.
3628 * The length is always a multiple of BDRV_SECTOR_SIZE.
3629 */
3630int64_t bdrv_getlength(BlockDriverState *bs)
3631{
3632 int64_t ret = bdrv_nb_sectors(bs);
3633
3634 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003635}
3636
bellard19cb3732006-08-19 11:45:59 +00003637/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003638void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003639{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003640 int64_t nb_sectors = bdrv_nb_sectors(bs);
3641
3642 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003643}
bellardcf989512004-02-16 21:56:36 +00003644
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003645void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3646 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003647{
3648 bs->on_read_error = on_read_error;
3649 bs->on_write_error = on_write_error;
3650}
3651
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003652BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003653{
3654 return is_read ? bs->on_read_error : bs->on_write_error;
3655}
3656
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003657BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3658{
3659 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3660
3661 switch (on_err) {
3662 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003663 return (error == ENOSPC) ?
3664 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003665 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003666 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003667 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003668 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003669 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003670 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003671 default:
3672 abort();
3673 }
3674}
3675
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003676static void send_qmp_error_event(BlockDriverState *bs,
3677 BlockErrorAction action,
3678 bool is_read, int error)
3679{
Peter Maydell573742a2014-10-10 20:33:03 +01003680 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003681
Peter Maydell573742a2014-10-10 20:33:03 +01003682 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3683 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003684 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003685 error == ENOSPC, strerror(error),
3686 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003687}
3688
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003689/* This is done by device models because, while the block layer knows
3690 * about the error, it does not know whether an operation comes from
3691 * the device or the block layer (from a job, for example).
3692 */
3693void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3694 bool is_read, int error)
3695{
3696 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003697
Wenchao Xiaa5895692014-06-18 08:43:30 +02003698 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003699 /* First set the iostatus, so that "info block" returns an iostatus
3700 * that matches the events raised so far (an additional error iostatus
3701 * is fine, but not a lost one).
3702 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003703 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003704
3705 /* Then raise the request to stop the VM and the event.
3706 * qemu_system_vmstop_request_prepare has two effects. First,
3707 * it ensures that the STOP event always comes after the
3708 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3709 * can observe the STOP event and do a "cont" before the STOP
3710 * event is issued, the VM will not stop. In this case, vm_start()
3711 * also ensures that the STOP/RESUME pair of events is emitted.
3712 */
3713 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003714 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003715 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3716 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003717 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003718 }
3719}
3720
bellardb3380822004-03-14 21:38:54 +00003721int bdrv_is_read_only(BlockDriverState *bs)
3722{
3723 return bs->read_only;
3724}
3725
ths985a03b2007-12-24 16:10:43 +00003726int bdrv_is_sg(BlockDriverState *bs)
3727{
3728 return bs->sg;
3729}
3730
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003731int bdrv_enable_write_cache(BlockDriverState *bs)
3732{
3733 return bs->enable_write_cache;
3734}
3735
Paolo Bonzini425b0142012-06-06 00:04:52 +02003736void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3737{
3738 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003739
3740 /* so a reopen() will preserve wce */
3741 if (wce) {
3742 bs->open_flags |= BDRV_O_CACHE_WB;
3743 } else {
3744 bs->open_flags &= ~BDRV_O_CACHE_WB;
3745 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003746}
3747
bellardea2384d2004-08-01 21:59:26 +00003748int bdrv_is_encrypted(BlockDriverState *bs)
3749{
3750 if (bs->backing_hd && bs->backing_hd->encrypted)
3751 return 1;
3752 return bs->encrypted;
3753}
3754
aliguoric0f4ce72009-03-05 23:01:01 +00003755int bdrv_key_required(BlockDriverState *bs)
3756{
3757 BlockDriverState *backing_hd = bs->backing_hd;
3758
3759 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3760 return 1;
3761 return (bs->encrypted && !bs->valid_key);
3762}
3763
bellardea2384d2004-08-01 21:59:26 +00003764int bdrv_set_key(BlockDriverState *bs, const char *key)
3765{
3766 int ret;
3767 if (bs->backing_hd && bs->backing_hd->encrypted) {
3768 ret = bdrv_set_key(bs->backing_hd, key);
3769 if (ret < 0)
3770 return ret;
3771 if (!bs->encrypted)
3772 return 0;
3773 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003774 if (!bs->encrypted) {
3775 return -EINVAL;
3776 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3777 return -ENOMEDIUM;
3778 }
aliguoric0f4ce72009-03-05 23:01:01 +00003779 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003780 if (ret < 0) {
3781 bs->valid_key = 0;
3782 } else if (!bs->valid_key) {
3783 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003784 if (bs->blk) {
3785 /* call the change callback now, we skipped it on open */
3786 blk_dev_change_media_cb(bs->blk, true);
3787 }
aliguoribb5fc202009-03-05 23:01:15 +00003788 }
aliguoric0f4ce72009-03-05 23:01:01 +00003789 return ret;
bellardea2384d2004-08-01 21:59:26 +00003790}
3791
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003792/*
3793 * Provide an encryption key for @bs.
3794 * If @key is non-null:
3795 * If @bs is not encrypted, fail.
3796 * Else if the key is invalid, fail.
3797 * Else set @bs's key to @key, replacing the existing key, if any.
3798 * If @key is null:
3799 * If @bs is encrypted and still lacks a key, fail.
3800 * Else do nothing.
3801 * On failure, store an error object through @errp if non-null.
3802 */
3803void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3804{
3805 if (key) {
3806 if (!bdrv_is_encrypted(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003807 error_setg(errp, "Device '%s' is not encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003808 bdrv_get_device_name(bs));
3809 } else if (bdrv_set_key(bs, key) < 0) {
3810 error_set(errp, QERR_INVALID_PASSWORD);
3811 }
3812 } else {
3813 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003814 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3815 "'%s' (%s) is encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003816 bdrv_get_device_name(bs),
3817 bdrv_get_encrypted_filename(bs));
3818 }
3819 }
3820}
3821
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003822const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003823{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003824 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003825}
3826
Stefan Hajnocziada42402014-08-27 12:08:55 +01003827static int qsort_strcmp(const void *a, const void *b)
3828{
3829 return strcmp(a, b);
3830}
3831
ths5fafdf22007-09-16 21:08:06 +00003832void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003833 void *opaque)
3834{
3835 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003836 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003837 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003838 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003839
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003840 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003841 if (drv->format_name) {
3842 bool found = false;
3843 int i = count;
3844 while (formats && i && !found) {
3845 found = !strcmp(formats[--i], drv->format_name);
3846 }
3847
3848 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003849 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003850 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003851 }
3852 }
bellardea2384d2004-08-01 21:59:26 +00003853 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003854
3855 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3856
3857 for (i = 0; i < count; i++) {
3858 it(opaque, formats[i]);
3859 }
3860
Jeff Codye855e4f2014-04-28 18:29:54 -04003861 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003862}
3863
Benoît Canetdc364f42014-01-23 21:31:32 +01003864/* This function is to find a node in the bs graph */
3865BlockDriverState *bdrv_find_node(const char *node_name)
3866{
3867 BlockDriverState *bs;
3868
3869 assert(node_name);
3870
3871 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3872 if (!strcmp(node_name, bs->node_name)) {
3873 return bs;
3874 }
3875 }
3876 return NULL;
3877}
3878
Benoît Canetc13163f2014-01-23 21:31:34 +01003879/* Put this QMP function here so it can access the static graph_bdrv_states. */
3880BlockDeviceInfoList *bdrv_named_nodes_list(void)
3881{
3882 BlockDeviceInfoList *list, *entry;
3883 BlockDriverState *bs;
3884
3885 list = NULL;
3886 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3887 entry = g_malloc0(sizeof(*entry));
3888 entry->value = bdrv_block_device_info(bs);
3889 entry->next = list;
3890 list = entry;
3891 }
3892
3893 return list;
3894}
3895
Benoît Canet12d3ba82014-01-23 21:31:35 +01003896BlockDriverState *bdrv_lookup_bs(const char *device,
3897 const char *node_name,
3898 Error **errp)
3899{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003900 BlockBackend *blk;
3901 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003902
Benoît Canet12d3ba82014-01-23 21:31:35 +01003903 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003904 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003905
Markus Armbruster7f06d472014-10-07 13:59:12 +02003906 if (blk) {
3907 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003908 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003909 }
3910
Benoît Canetdd67fa52014-02-12 17:15:06 +01003911 if (node_name) {
3912 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003913
Benoît Canetdd67fa52014-02-12 17:15:06 +01003914 if (bs) {
3915 return bs;
3916 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003917 }
3918
Benoît Canetdd67fa52014-02-12 17:15:06 +01003919 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3920 device ? device : "",
3921 node_name ? node_name : "");
3922 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003923}
3924
Jeff Cody5a6684d2014-06-25 15:40:09 -04003925/* If 'base' is in the same chain as 'top', return true. Otherwise,
3926 * return false. If either argument is NULL, return false. */
3927bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3928{
3929 while (top && top != base) {
3930 top = top->backing_hd;
3931 }
3932
3933 return top != NULL;
3934}
3935
Fam Zheng04df7652014-10-31 11:32:54 +08003936BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3937{
3938 if (!bs) {
3939 return QTAILQ_FIRST(&graph_bdrv_states);
3940 }
3941 return QTAILQ_NEXT(bs, node_list);
3942}
3943
Markus Armbruster2f399b02010-06-02 18:55:20 +02003944BlockDriverState *bdrv_next(BlockDriverState *bs)
3945{
3946 if (!bs) {
3947 return QTAILQ_FIRST(&bdrv_states);
3948 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003949 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003950}
3951
Fam Zheng20a9e772014-10-31 11:32:55 +08003952const char *bdrv_get_node_name(const BlockDriverState *bs)
3953{
3954 return bs->node_name;
3955}
3956
Markus Armbruster7f06d472014-10-07 13:59:12 +02003957/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003958const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003959{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003960 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003961}
3962
Markus Armbrusterc8433282012-06-05 16:49:24 +02003963int bdrv_get_flags(BlockDriverState *bs)
3964{
3965 return bs->open_flags;
3966}
3967
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003968int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003969{
3970 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003971 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003972
Benoît Canetdc364f42014-01-23 21:31:32 +01003973 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003974 AioContext *aio_context = bdrv_get_aio_context(bs);
3975 int ret;
3976
3977 aio_context_acquire(aio_context);
3978 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003979 if (ret < 0 && !result) {
3980 result = ret;
3981 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003982 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003983 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003984
3985 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003986}
3987
Peter Lieven3ac21622013-06-28 12:47:42 +02003988int bdrv_has_zero_init_1(BlockDriverState *bs)
3989{
3990 return 1;
3991}
3992
Kevin Wolff2feebb2010-04-14 17:30:35 +02003993int bdrv_has_zero_init(BlockDriverState *bs)
3994{
3995 assert(bs->drv);
3996
Paolo Bonzini11212d82013-09-04 19:00:27 +02003997 /* If BS is a copy on write image, it is initialized to
3998 the contents of the base image, which may not be zeroes. */
3999 if (bs->backing_hd) {
4000 return 0;
4001 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004002 if (bs->drv->bdrv_has_zero_init) {
4003 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004004 }
4005
Peter Lieven3ac21622013-06-28 12:47:42 +02004006 /* safe default */
4007 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004008}
4009
Peter Lieven4ce78692013-10-24 12:06:54 +02004010bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4011{
4012 BlockDriverInfo bdi;
4013
4014 if (bs->backing_hd) {
4015 return false;
4016 }
4017
4018 if (bdrv_get_info(bs, &bdi) == 0) {
4019 return bdi.unallocated_blocks_are_zero;
4020 }
4021
4022 return false;
4023}
4024
4025bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4026{
4027 BlockDriverInfo bdi;
4028
4029 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4030 return false;
4031 }
4032
4033 if (bdrv_get_info(bs, &bdi) == 0) {
4034 return bdi.can_write_zeroes_with_unmap;
4035 }
4036
4037 return false;
4038}
4039
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004040typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004041 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004042 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004043 int64_t sector_num;
4044 int nb_sectors;
4045 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004046 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004047 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004048} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004049
thsf58c7b32008-06-05 21:53:49 +00004050/*
Fam Zheng705be722014-11-10 17:10:38 +08004051 * Returns the allocation status of the specified sectors.
4052 * Drivers not implementing the functionality are assumed to not support
4053 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004054 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004055 * If 'sector_num' is beyond the end of the disk image the return value is 0
4056 * and 'pnum' is set to 0.
4057 *
thsf58c7b32008-06-05 21:53:49 +00004058 * 'pnum' is set to the number of sectors (including and immediately following
4059 * the specified sector) that are known to be in the same
4060 * allocated/unallocated state.
4061 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004062 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4063 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004064 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004065static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4066 int64_t sector_num,
4067 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004068{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004069 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004070 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004071 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004072
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004073 total_sectors = bdrv_nb_sectors(bs);
4074 if (total_sectors < 0) {
4075 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004076 }
4077
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004078 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004079 *pnum = 0;
4080 return 0;
4081 }
4082
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004083 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004084 if (n < nb_sectors) {
4085 nb_sectors = n;
4086 }
4087
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004088 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004089 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004090 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004091 if (bs->drv->protocol_name) {
4092 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4093 }
4094 return ret;
thsf58c7b32008-06-05 21:53:49 +00004095 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004096
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004097 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4098 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004099 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004100 return ret;
4101 }
4102
Peter Lieven92bc50a2013-10-08 14:43:14 +02004103 if (ret & BDRV_BLOCK_RAW) {
4104 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4105 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4106 *pnum, pnum);
4107 }
4108
Kevin Wolfe88ae222014-05-06 15:25:36 +02004109 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4110 ret |= BDRV_BLOCK_ALLOCATED;
4111 }
4112
Peter Lievenc3d86882013-10-24 12:07:04 +02004113 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4114 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004115 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004116 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004117 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004118 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4119 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004120 ret |= BDRV_BLOCK_ZERO;
4121 }
4122 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004123 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004124
4125 if (bs->file &&
4126 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4127 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004128 int file_pnum;
4129
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004130 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004131 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004132 if (ret2 >= 0) {
4133 /* Ignore errors. This is just providing extra information, it
4134 * is useful but not necessary.
4135 */
Max Reitz59c9a952014-10-22 17:00:15 +02004136 if (!file_pnum) {
4137 /* !file_pnum indicates an offset at or beyond the EOF; it is
4138 * perfectly valid for the format block driver to point to such
4139 * offsets, so catch it and mark everything as zero */
4140 ret |= BDRV_BLOCK_ZERO;
4141 } else {
4142 /* Limit request to the range reported by the protocol driver */
4143 *pnum = file_pnum;
4144 ret |= (ret2 & BDRV_BLOCK_ZERO);
4145 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004146 }
4147 }
4148
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004149 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004150}
4151
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004152/* Coroutine wrapper for bdrv_get_block_status() */
4153static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004154{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004155 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004156 BlockDriverState *bs = data->bs;
4157
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004158 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4159 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004160 data->done = true;
4161}
4162
4163/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004164 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004165 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004166 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004167 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004168int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4169 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004170{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004171 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004172 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004173 .bs = bs,
4174 .sector_num = sector_num,
4175 .nb_sectors = nb_sectors,
4176 .pnum = pnum,
4177 .done = false,
4178 };
4179
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004180 if (qemu_in_coroutine()) {
4181 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004182 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004183 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004184 AioContext *aio_context = bdrv_get_aio_context(bs);
4185
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004186 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004187 qemu_coroutine_enter(co, &data);
4188 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004189 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004190 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004191 }
4192 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004193}
4194
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004195int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4196 int nb_sectors, int *pnum)
4197{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004198 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4199 if (ret < 0) {
4200 return ret;
4201 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004202 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004203}
4204
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004205/*
4206 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4207 *
4208 * Return true if the given sector is allocated in any image between
4209 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4210 * sector is allocated in any image of the chain. Return false otherwise.
4211 *
4212 * 'pnum' is set to the number of sectors (including and immediately following
4213 * the specified sector) that are known to be in the same
4214 * allocated/unallocated state.
4215 *
4216 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004217int bdrv_is_allocated_above(BlockDriverState *top,
4218 BlockDriverState *base,
4219 int64_t sector_num,
4220 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004221{
4222 BlockDriverState *intermediate;
4223 int ret, n = nb_sectors;
4224
4225 intermediate = top;
4226 while (intermediate && intermediate != base) {
4227 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004228 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4229 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004230 if (ret < 0) {
4231 return ret;
4232 } else if (ret) {
4233 *pnum = pnum_inter;
4234 return 1;
4235 }
4236
4237 /*
4238 * [sector_num, nb_sectors] is unallocated on top but intermediate
4239 * might have
4240 *
4241 * [sector_num+x, nr_sectors] allocated.
4242 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004243 if (n > pnum_inter &&
4244 (intermediate == top ||
4245 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004246 n = pnum_inter;
4247 }
4248
4249 intermediate = intermediate->backing_hd;
4250 }
4251
4252 *pnum = n;
4253 return 0;
4254}
4255
aliguori045df332009-03-05 23:00:48 +00004256const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4257{
4258 if (bs->backing_hd && bs->backing_hd->encrypted)
4259 return bs->backing_file;
4260 else if (bs->encrypted)
4261 return bs->filename;
4262 else
4263 return NULL;
4264}
4265
ths5fafdf22007-09-16 21:08:06 +00004266void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004267 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004268{
Kevin Wolf3574c602011-10-26 11:02:11 +02004269 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004270}
4271
ths5fafdf22007-09-16 21:08:06 +00004272int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004273 const uint8_t *buf, int nb_sectors)
4274{
4275 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004276 int ret;
4277
4278 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004279 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004280 }
4281 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004282 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004283 }
4284 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4285 if (ret < 0) {
4286 return ret;
4287 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004288
Fam Zhenge4654d22013-11-13 18:29:43 +08004289 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004290
bellardfaea38e2006-08-05 21:31:00 +00004291 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4292}
ths3b46e622007-09-17 08:09:54 +00004293
bellardfaea38e2006-08-05 21:31:00 +00004294int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4295{
4296 BlockDriver *drv = bs->drv;
4297 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004298 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004299 if (!drv->bdrv_get_info)
4300 return -ENOTSUP;
4301 memset(bdi, 0, sizeof(*bdi));
4302 return drv->bdrv_get_info(bs, bdi);
4303}
4304
Max Reitzeae041f2013-10-09 10:46:16 +02004305ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4306{
4307 BlockDriver *drv = bs->drv;
4308 if (drv && drv->bdrv_get_specific_info) {
4309 return drv->bdrv_get_specific_info(bs);
4310 }
4311 return NULL;
4312}
4313
Christoph Hellwig45566e92009-07-10 23:11:57 +02004314int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4315 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004316{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004317 QEMUIOVector qiov;
4318 struct iovec iov = {
4319 .iov_base = (void *) buf,
4320 .iov_len = size,
4321 };
4322
4323 qemu_iovec_init_external(&qiov, &iov, 1);
4324 return bdrv_writev_vmstate(bs, &qiov, pos);
4325}
4326
4327int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4328{
aliguori178e08a2009-04-05 19:10:55 +00004329 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004330
4331 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004332 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004333 } else if (drv->bdrv_save_vmstate) {
4334 return drv->bdrv_save_vmstate(bs, qiov, pos);
4335 } else if (bs->file) {
4336 return bdrv_writev_vmstate(bs->file, qiov, pos);
4337 }
4338
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004339 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004340}
4341
Christoph Hellwig45566e92009-07-10 23:11:57 +02004342int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4343 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004344{
4345 BlockDriver *drv = bs->drv;
4346 if (!drv)
4347 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004348 if (drv->bdrv_load_vmstate)
4349 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4350 if (bs->file)
4351 return bdrv_load_vmstate(bs->file, buf, pos, size);
4352 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004353}
4354
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004355void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4356{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004357 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004358 return;
4359 }
4360
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004361 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004362}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004363
Kevin Wolf41c695c2012-12-06 14:32:58 +01004364int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4365 const char *tag)
4366{
4367 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4368 bs = bs->file;
4369 }
4370
4371 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4372 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4373 }
4374
4375 return -ENOTSUP;
4376}
4377
Fam Zheng4cc70e92013-11-20 10:01:54 +08004378int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4379{
4380 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4381 bs = bs->file;
4382 }
4383
4384 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4385 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4386 }
4387
4388 return -ENOTSUP;
4389}
4390
Kevin Wolf41c695c2012-12-06 14:32:58 +01004391int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4392{
Max Reitz938789e2014-03-10 23:44:08 +01004393 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004394 bs = bs->file;
4395 }
4396
4397 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4398 return bs->drv->bdrv_debug_resume(bs, tag);
4399 }
4400
4401 return -ENOTSUP;
4402}
4403
4404bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4405{
4406 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4407 bs = bs->file;
4408 }
4409
4410 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4411 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4412 }
4413
4414 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004415}
4416
Blue Swirl199630b2010-07-25 20:49:34 +00004417int bdrv_is_snapshot(BlockDriverState *bs)
4418{
4419 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4420}
4421
Jeff Codyb1b1d782012-10-16 15:49:09 -04004422/* backing_file can either be relative, or absolute, or a protocol. If it is
4423 * relative, it must be relative to the chain. So, passing in bs->filename
4424 * from a BDS as backing_file should not be done, as that may be relative to
4425 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004426BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4427 const char *backing_file)
4428{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004429 char *filename_full = NULL;
4430 char *backing_file_full = NULL;
4431 char *filename_tmp = NULL;
4432 int is_protocol = 0;
4433 BlockDriverState *curr_bs = NULL;
4434 BlockDriverState *retval = NULL;
4435
4436 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004437 return NULL;
4438 }
4439
Jeff Codyb1b1d782012-10-16 15:49:09 -04004440 filename_full = g_malloc(PATH_MAX);
4441 backing_file_full = g_malloc(PATH_MAX);
4442 filename_tmp = g_malloc(PATH_MAX);
4443
4444 is_protocol = path_has_protocol(backing_file);
4445
4446 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4447
4448 /* If either of the filename paths is actually a protocol, then
4449 * compare unmodified paths; otherwise make paths relative */
4450 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4451 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4452 retval = curr_bs->backing_hd;
4453 break;
4454 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004455 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004456 /* If not an absolute filename path, make it relative to the current
4457 * image's filename path */
4458 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4459 backing_file);
4460
4461 /* We are going to compare absolute pathnames */
4462 if (!realpath(filename_tmp, filename_full)) {
4463 continue;
4464 }
4465
4466 /* We need to make sure the backing filename we are comparing against
4467 * is relative to the current image filename (or absolute) */
4468 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4469 curr_bs->backing_file);
4470
4471 if (!realpath(filename_tmp, backing_file_full)) {
4472 continue;
4473 }
4474
4475 if (strcmp(backing_file_full, filename_full) == 0) {
4476 retval = curr_bs->backing_hd;
4477 break;
4478 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004479 }
4480 }
4481
Jeff Codyb1b1d782012-10-16 15:49:09 -04004482 g_free(filename_full);
4483 g_free(backing_file_full);
4484 g_free(filename_tmp);
4485 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004486}
4487
Benoît Canetf198fd12012-08-02 10:22:47 +02004488int bdrv_get_backing_file_depth(BlockDriverState *bs)
4489{
4490 if (!bs->drv) {
4491 return 0;
4492 }
4493
4494 if (!bs->backing_hd) {
4495 return 0;
4496 }
4497
4498 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4499}
4500
bellard83f64092006-08-01 16:21:11 +00004501/**************************************************************/
4502/* async I/Os */
4503
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004504BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4505 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004506 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004507{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004508 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4509
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004510 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004511 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004512}
4513
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004514BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4515 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004516 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004517{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004518 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4519
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004520 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004521 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004522}
4523
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004524BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004525 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004526 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004527{
4528 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4529
4530 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4531 BDRV_REQ_ZERO_WRITE | flags,
4532 cb, opaque, true);
4533}
4534
Kevin Wolf40b4f532009-09-09 17:53:37 +02004535
4536typedef struct MultiwriteCB {
4537 int error;
4538 int num_requests;
4539 int num_callbacks;
4540 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004541 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004542 void *opaque;
4543 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004544 } callbacks[];
4545} MultiwriteCB;
4546
4547static void multiwrite_user_cb(MultiwriteCB *mcb)
4548{
4549 int i;
4550
4551 for (i = 0; i < mcb->num_callbacks; i++) {
4552 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004553 if (mcb->callbacks[i].free_qiov) {
4554 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4555 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004556 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004557 }
4558}
4559
4560static void multiwrite_cb(void *opaque, int ret)
4561{
4562 MultiwriteCB *mcb = opaque;
4563
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004564 trace_multiwrite_cb(mcb, ret);
4565
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004566 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004567 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004568 }
4569
4570 mcb->num_requests--;
4571 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004572 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004573 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004574 }
4575}
4576
4577static int multiwrite_req_compare(const void *a, const void *b)
4578{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004579 const BlockRequest *req1 = a, *req2 = b;
4580
4581 /*
4582 * Note that we can't simply subtract req2->sector from req1->sector
4583 * here as that could overflow the return value.
4584 */
4585 if (req1->sector > req2->sector) {
4586 return 1;
4587 } else if (req1->sector < req2->sector) {
4588 return -1;
4589 } else {
4590 return 0;
4591 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004592}
4593
4594/*
4595 * Takes a bunch of requests and tries to merge them. Returns the number of
4596 * requests that remain after merging.
4597 */
4598static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4599 int num_reqs, MultiwriteCB *mcb)
4600{
4601 int i, outidx;
4602
4603 // Sort requests by start sector
4604 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4605
4606 // Check if adjacent requests touch the same clusters. If so, combine them,
4607 // filling up gaps with zero sectors.
4608 outidx = 0;
4609 for (i = 1; i < num_reqs; i++) {
4610 int merge = 0;
4611 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4612
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004613 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004614 if (reqs[i].sector <= oldreq_last) {
4615 merge = 1;
4616 }
4617
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004618 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4619 merge = 0;
4620 }
4621
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004622 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4623 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4624 merge = 0;
4625 }
4626
Kevin Wolf40b4f532009-09-09 17:53:37 +02004627 if (merge) {
4628 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004629 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004630 qemu_iovec_init(qiov,
4631 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4632
4633 // Add the first request to the merged one. If the requests are
4634 // overlapping, drop the last sectors of the first request.
4635 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004636 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004637
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004638 // We should need to add any zeros between the two requests
4639 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004640
4641 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004642 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004643
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004644 // Add tail of first request, if necessary
4645 if (qiov->size < reqs[outidx].qiov->size) {
4646 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4647 reqs[outidx].qiov->size - qiov->size);
4648 }
4649
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004650 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004651 reqs[outidx].qiov = qiov;
4652
4653 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4654 } else {
4655 outidx++;
4656 reqs[outidx].sector = reqs[i].sector;
4657 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4658 reqs[outidx].qiov = reqs[i].qiov;
4659 }
4660 }
4661
Peter Lievenf4564d52015-02-02 14:52:18 +01004662 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4663
Kevin Wolf40b4f532009-09-09 17:53:37 +02004664 return outidx + 1;
4665}
4666
4667/*
4668 * Submit multiple AIO write requests at once.
4669 *
4670 * On success, the function returns 0 and all requests in the reqs array have
4671 * been submitted. In error case this function returns -1, and any of the
4672 * requests may or may not be submitted yet. In particular, this means that the
4673 * callback will be called for some of the requests, for others it won't. The
4674 * caller must check the error field of the BlockRequest to wait for the right
4675 * callbacks (if error != 0, no callback will be called).
4676 *
4677 * The implementation may modify the contents of the reqs array, e.g. to merge
4678 * requests. However, the fields opaque and error are left unmodified as they
4679 * are used to signal failure for a single request to the caller.
4680 */
4681int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4682{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004683 MultiwriteCB *mcb;
4684 int i;
4685
Ryan Harper301db7c2011-03-07 10:01:04 -06004686 /* don't submit writes if we don't have a medium */
4687 if (bs->drv == NULL) {
4688 for (i = 0; i < num_reqs; i++) {
4689 reqs[i].error = -ENOMEDIUM;
4690 }
4691 return -1;
4692 }
4693
Kevin Wolf40b4f532009-09-09 17:53:37 +02004694 if (num_reqs == 0) {
4695 return 0;
4696 }
4697
4698 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004699 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004700 mcb->num_requests = 0;
4701 mcb->num_callbacks = num_reqs;
4702
4703 for (i = 0; i < num_reqs; i++) {
4704 mcb->callbacks[i].cb = reqs[i].cb;
4705 mcb->callbacks[i].opaque = reqs[i].opaque;
4706 }
4707
4708 // Check for mergable requests
4709 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4710
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004711 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4712
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004713 /* Run the aio requests. */
4714 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004715 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004716 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4717 reqs[i].nb_sectors, reqs[i].flags,
4718 multiwrite_cb, mcb,
4719 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004720 }
4721
4722 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004723}
4724
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004725void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004726{
Fam Zhengca5fd112014-09-11 13:41:27 +08004727 qemu_aio_ref(acb);
4728 bdrv_aio_cancel_async(acb);
4729 while (acb->refcnt > 1) {
4730 if (acb->aiocb_info->get_aio_context) {
4731 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4732 } else if (acb->bs) {
4733 aio_poll(bdrv_get_aio_context(acb->bs), true);
4734 } else {
4735 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004736 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004737 }
Fam Zheng80074292014-09-11 13:41:28 +08004738 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004739}
4740
4741/* Async version of aio cancel. The caller is not blocked if the acb implements
4742 * cancel_async, otherwise we do nothing and let the request normally complete.
4743 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004744void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004745{
4746 if (acb->aiocb_info->cancel_async) {
4747 acb->aiocb_info->cancel_async(acb);
4748 }
bellard83f64092006-08-01 16:21:11 +00004749}
4750
4751/**************************************************************/
4752/* async block device emulation */
4753
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004754typedef struct BlockAIOCBSync {
4755 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004756 QEMUBH *bh;
4757 int ret;
4758 /* vector translation state */
4759 QEMUIOVector *qiov;
4760 uint8_t *bounce;
4761 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004762} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004763
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004764static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004765 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004766};
4767
bellard83f64092006-08-01 16:21:11 +00004768static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004769{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004770 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004771
Kevin Wolf857d4f42014-05-20 13:16:51 +02004772 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004773 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004774 }
aliguoriceb42de2009-04-07 18:43:28 +00004775 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004776 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004777 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004778 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004779 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004780}
bellardbeac80c2006-06-26 20:08:57 +00004781
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004782static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4783 int64_t sector_num,
4784 QEMUIOVector *qiov,
4785 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004786 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004787 void *opaque,
4788 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004789
bellardea2384d2004-08-01 21:59:26 +00004790{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004791 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004792
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004793 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004794 acb->is_write = is_write;
4795 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004796 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004797 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004798
Kevin Wolf857d4f42014-05-20 13:16:51 +02004799 if (acb->bounce == NULL) {
4800 acb->ret = -ENOMEM;
4801 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004802 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004803 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004804 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004805 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004806 }
4807
pbrookce1a14d2006-08-07 02:38:06 +00004808 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004809
pbrookce1a14d2006-08-07 02:38:06 +00004810 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004811}
4812
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004813static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004814 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004815 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004816{
aliguorif141eaf2009-04-07 18:43:24 +00004817 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004818}
4819
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004820static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004821 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004822 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004823{
4824 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4825}
4826
Kevin Wolf68485422011-06-30 10:05:46 +02004827
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004828typedef struct BlockAIOCBCoroutine {
4829 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004830 BlockRequest req;
4831 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004832 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004833 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004834} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004835
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004836static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004837 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004838};
4839
Paolo Bonzini35246a62011-10-14 10:41:29 +02004840static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004841{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004842 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004843
4844 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004845
Kevin Wolf68485422011-06-30 10:05:46 +02004846 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004847 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004848}
4849
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004850/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4851static void coroutine_fn bdrv_co_do_rw(void *opaque)
4852{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004853 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004854 BlockDriverState *bs = acb->common.bs;
4855
4856 if (!acb->is_write) {
4857 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004858 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004859 } else {
4860 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004861 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004862 }
4863
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004864 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004865 qemu_bh_schedule(acb->bh);
4866}
4867
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004868static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4869 int64_t sector_num,
4870 QEMUIOVector *qiov,
4871 int nb_sectors,
4872 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004873 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004874 void *opaque,
4875 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004876{
4877 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004878 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004879
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004880 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004881 acb->req.sector = sector_num;
4882 acb->req.nb_sectors = nb_sectors;
4883 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004884 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004885 acb->is_write = is_write;
4886
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004887 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004888 qemu_coroutine_enter(co, acb);
4889
4890 return &acb->common;
4891}
4892
Paolo Bonzini07f07612011-10-17 12:32:12 +02004893static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004894{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004895 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004896 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004897
Paolo Bonzini07f07612011-10-17 12:32:12 +02004898 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004899 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004900 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004901}
4902
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004903BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004904 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004905{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004906 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004907
Paolo Bonzini07f07612011-10-17 12:32:12 +02004908 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004909 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004910
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004911 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004912
Paolo Bonzini07f07612011-10-17 12:32:12 +02004913 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4914 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004915
Alexander Graf016f5cf2010-05-26 17:51:49 +02004916 return &acb->common;
4917}
4918
Paolo Bonzini4265d622011-10-17 12:32:14 +02004919static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4920{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004921 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004922 BlockDriverState *bs = acb->common.bs;
4923
4924 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004925 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004926 qemu_bh_schedule(acb->bh);
4927}
4928
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004929BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004930 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004931 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004932{
4933 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004934 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004935
4936 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4937
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004938 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004939 acb->req.sector = sector_num;
4940 acb->req.nb_sectors = nb_sectors;
4941 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4942 qemu_coroutine_enter(co, acb);
4943
4944 return &acb->common;
4945}
4946
bellardea2384d2004-08-01 21:59:26 +00004947void bdrv_init(void)
4948{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004949 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004950}
pbrookce1a14d2006-08-07 02:38:06 +00004951
Markus Armbrustereb852012009-10-27 18:41:44 +01004952void bdrv_init_with_whitelist(void)
4953{
4954 use_bdrv_whitelist = 1;
4955 bdrv_init();
4956}
4957
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004958void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004959 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004960{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004961 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004962
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004963 acb = g_slice_alloc(aiocb_info->aiocb_size);
4964 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004965 acb->bs = bs;
4966 acb->cb = cb;
4967 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004968 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004969 return acb;
4970}
4971
Fam Zhengf197fe22014-09-11 13:41:08 +08004972void qemu_aio_ref(void *p)
4973{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004974 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004975 acb->refcnt++;
4976}
4977
Fam Zheng80074292014-09-11 13:41:28 +08004978void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004979{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004980 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004981 assert(acb->refcnt > 0);
4982 if (--acb->refcnt == 0) {
4983 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4984 }
pbrookce1a14d2006-08-07 02:38:06 +00004985}
bellard19cb3732006-08-19 11:45:59 +00004986
4987/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004988/* Coroutine block device emulation */
4989
4990typedef struct CoroutineIOCompletion {
4991 Coroutine *coroutine;
4992 int ret;
4993} CoroutineIOCompletion;
4994
4995static void bdrv_co_io_em_complete(void *opaque, int ret)
4996{
4997 CoroutineIOCompletion *co = opaque;
4998
4999 co->ret = ret;
5000 qemu_coroutine_enter(co->coroutine, NULL);
5001}
5002
5003static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5004 int nb_sectors, QEMUIOVector *iov,
5005 bool is_write)
5006{
5007 CoroutineIOCompletion co = {
5008 .coroutine = qemu_coroutine_self(),
5009 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005010 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005011
5012 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005013 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5014 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005015 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005016 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5017 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005018 }
5019
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005020 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005021 if (!acb) {
5022 return -EIO;
5023 }
5024 qemu_coroutine_yield();
5025
5026 return co.ret;
5027}
5028
5029static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5030 int64_t sector_num, int nb_sectors,
5031 QEMUIOVector *iov)
5032{
5033 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5034}
5035
5036static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5037 int64_t sector_num, int nb_sectors,
5038 QEMUIOVector *iov)
5039{
5040 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5041}
5042
Paolo Bonzini07f07612011-10-17 12:32:12 +02005043static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005044{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005045 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005046
Paolo Bonzini07f07612011-10-17 12:32:12 +02005047 rwco->ret = bdrv_co_flush(rwco->bs);
5048}
5049
5050int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5051{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005052 int ret;
5053
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005054 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005055 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005056 }
5057
Kevin Wolfca716362011-11-10 18:13:59 +01005058 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005059 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005060 if (bs->drv->bdrv_co_flush_to_os) {
5061 ret = bs->drv->bdrv_co_flush_to_os(bs);
5062 if (ret < 0) {
5063 return ret;
5064 }
5065 }
5066
Kevin Wolfca716362011-11-10 18:13:59 +01005067 /* But don't actually force it to the disk with cache=unsafe */
5068 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005069 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005070 }
5071
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005072 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005073 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005074 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005075 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005076 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005077 CoroutineIOCompletion co = {
5078 .coroutine = qemu_coroutine_self(),
5079 };
5080
5081 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5082 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005083 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005084 } else {
5085 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005086 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005087 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005088 } else {
5089 /*
5090 * Some block drivers always operate in either writethrough or unsafe
5091 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5092 * know how the server works (because the behaviour is hardcoded or
5093 * depends on server-side configuration), so we can't ensure that
5094 * everything is safe on disk. Returning an error doesn't work because
5095 * that would break guests even if the server operates in writethrough
5096 * mode.
5097 *
5098 * Let's hope the user knows what he's doing.
5099 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005100 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005101 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005102 if (ret < 0) {
5103 return ret;
5104 }
5105
5106 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5107 * in the case of cache=unsafe, so there are no useless flushes.
5108 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005109flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005110 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005111}
5112
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005113void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005114{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005115 Error *local_err = NULL;
5116 int ret;
5117
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005118 if (!bs->drv) {
5119 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005120 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005121
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005122 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5123 return;
5124 }
5125 bs->open_flags &= ~BDRV_O_INCOMING;
5126
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005127 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005128 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005129 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005130 bdrv_invalidate_cache(bs->file, &local_err);
5131 }
5132 if (local_err) {
5133 error_propagate(errp, local_err);
5134 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005135 }
5136
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005137 ret = refresh_total_sectors(bs, bs->total_sectors);
5138 if (ret < 0) {
5139 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5140 return;
5141 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005142}
5143
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005144void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005145{
5146 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005147 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005148
Benoît Canetdc364f42014-01-23 21:31:32 +01005149 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005150 AioContext *aio_context = bdrv_get_aio_context(bs);
5151
5152 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005153 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005154 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005155 if (local_err) {
5156 error_propagate(errp, local_err);
5157 return;
5158 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005159 }
5160}
5161
Paolo Bonzini07f07612011-10-17 12:32:12 +02005162int bdrv_flush(BlockDriverState *bs)
5163{
5164 Coroutine *co;
5165 RwCo rwco = {
5166 .bs = bs,
5167 .ret = NOT_DONE,
5168 };
5169
5170 if (qemu_in_coroutine()) {
5171 /* Fast-path if already in coroutine context */
5172 bdrv_flush_co_entry(&rwco);
5173 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005174 AioContext *aio_context = bdrv_get_aio_context(bs);
5175
Paolo Bonzini07f07612011-10-17 12:32:12 +02005176 co = qemu_coroutine_create(bdrv_flush_co_entry);
5177 qemu_coroutine_enter(co, &rwco);
5178 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005179 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005180 }
5181 }
5182
5183 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005184}
5185
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005186typedef struct DiscardCo {
5187 BlockDriverState *bs;
5188 int64_t sector_num;
5189 int nb_sectors;
5190 int ret;
5191} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005192static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5193{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005194 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005195
5196 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5197}
5198
5199int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5200 int nb_sectors)
5201{
Max Reitzb9c64942015-02-05 13:58:25 -05005202 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005203
Paolo Bonzini4265d622011-10-17 12:32:14 +02005204 if (!bs->drv) {
5205 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005206 }
5207
5208 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5209 if (ret < 0) {
5210 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005211 } else if (bs->read_only) {
5212 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005213 }
5214
Fam Zhenge4654d22013-11-13 18:29:43 +08005215 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005216
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005217 /* Do nothing if disabled. */
5218 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5219 return 0;
5220 }
5221
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005222 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005223 return 0;
5224 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005225
Peter Lieven75af1f32015-02-06 11:54:11 +01005226 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005227 while (nb_sectors > 0) {
5228 int ret;
5229 int num = nb_sectors;
5230
5231 /* align request */
5232 if (bs->bl.discard_alignment &&
5233 num >= bs->bl.discard_alignment &&
5234 sector_num % bs->bl.discard_alignment) {
5235 if (num > bs->bl.discard_alignment) {
5236 num = bs->bl.discard_alignment;
5237 }
5238 num -= sector_num % bs->bl.discard_alignment;
5239 }
5240
5241 /* limit request size */
5242 if (num > max_discard) {
5243 num = max_discard;
5244 }
5245
5246 if (bs->drv->bdrv_co_discard) {
5247 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5248 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005249 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005250 CoroutineIOCompletion co = {
5251 .coroutine = qemu_coroutine_self(),
5252 };
5253
5254 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5255 bdrv_co_io_em_complete, &co);
5256 if (acb == NULL) {
5257 return -EIO;
5258 } else {
5259 qemu_coroutine_yield();
5260 ret = co.ret;
5261 }
5262 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005263 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005264 return ret;
5265 }
5266
5267 sector_num += num;
5268 nb_sectors -= num;
5269 }
5270 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005271}
5272
5273int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5274{
5275 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005276 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005277 .bs = bs,
5278 .sector_num = sector_num,
5279 .nb_sectors = nb_sectors,
5280 .ret = NOT_DONE,
5281 };
5282
5283 if (qemu_in_coroutine()) {
5284 /* Fast-path if already in coroutine context */
5285 bdrv_discard_co_entry(&rwco);
5286 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005287 AioContext *aio_context = bdrv_get_aio_context(bs);
5288
Paolo Bonzini4265d622011-10-17 12:32:14 +02005289 co = qemu_coroutine_create(bdrv_discard_co_entry);
5290 qemu_coroutine_enter(co, &rwco);
5291 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005292 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005293 }
5294 }
5295
5296 return rwco.ret;
5297}
5298
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005299/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005300/* removable device support */
5301
5302/**
5303 * Return TRUE if the media is present
5304 */
5305int bdrv_is_inserted(BlockDriverState *bs)
5306{
5307 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005308
bellard19cb3732006-08-19 11:45:59 +00005309 if (!drv)
5310 return 0;
5311 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005312 return 1;
5313 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005314}
5315
5316/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005317 * Return whether the media changed since the last call to this
5318 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005319 */
5320int bdrv_media_changed(BlockDriverState *bs)
5321{
5322 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005323
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005324 if (drv && drv->bdrv_media_changed) {
5325 return drv->bdrv_media_changed(bs);
5326 }
5327 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005328}
5329
5330/**
5331 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5332 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005333void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005334{
5335 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005336 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005337
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005338 if (drv && drv->bdrv_eject) {
5339 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005340 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005341
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005342 device_name = bdrv_get_device_name(bs);
5343 if (device_name[0] != '\0') {
5344 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005345 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005346 }
bellard19cb3732006-08-19 11:45:59 +00005347}
5348
bellard19cb3732006-08-19 11:45:59 +00005349/**
5350 * Lock or unlock the media (if it is locked, the user won't be able
5351 * to eject it manually).
5352 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005353void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005354{
5355 BlockDriver *drv = bs->drv;
5356
Markus Armbruster025e8492011-09-06 18:58:47 +02005357 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005358
Markus Armbruster025e8492011-09-06 18:58:47 +02005359 if (drv && drv->bdrv_lock_medium) {
5360 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005361 }
5362}
ths985a03b2007-12-24 16:10:43 +00005363
5364/* needed for generic scsi interface */
5365
5366int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5367{
5368 BlockDriver *drv = bs->drv;
5369
5370 if (drv && drv->bdrv_ioctl)
5371 return drv->bdrv_ioctl(bs, req, buf);
5372 return -ENOTSUP;
5373}
aliguori7d780662009-03-12 19:57:08 +00005374
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005375BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005376 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005377 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005378{
aliguori221f7152009-03-28 17:28:41 +00005379 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005380
aliguori221f7152009-03-28 17:28:41 +00005381 if (drv && drv->bdrv_aio_ioctl)
5382 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5383 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005384}
aliguorie268ca52009-04-22 20:20:00 +00005385
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005386void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005387{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005388 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005389}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005390
aliguorie268ca52009-04-22 20:20:00 +00005391void *qemu_blockalign(BlockDriverState *bs, size_t size)
5392{
Kevin Wolf339064d2013-11-28 10:23:32 +01005393 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005394}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005395
Max Reitz9ebd8442014-10-22 14:09:27 +02005396void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5397{
5398 return memset(qemu_blockalign(bs, size), 0, size);
5399}
5400
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005401void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5402{
5403 size_t align = bdrv_opt_mem_align(bs);
5404
5405 /* Ensure that NULL is never returned on success */
5406 assert(align > 0);
5407 if (size == 0) {
5408 size = align;
5409 }
5410
5411 return qemu_try_memalign(align, size);
5412}
5413
Max Reitz9ebd8442014-10-22 14:09:27 +02005414void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5415{
5416 void *mem = qemu_try_blockalign(bs, size);
5417
5418 if (mem) {
5419 memset(mem, 0, size);
5420 }
5421
5422 return mem;
5423}
5424
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005425/*
5426 * Check if all memory in this vector is sector aligned.
5427 */
5428bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5429{
5430 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005431 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005432
5433 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005434 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005435 return false;
5436 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005437 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005438 return false;
5439 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005440 }
5441
5442 return true;
5443}
5444
Fam Zhengb8afb522014-04-16 09:34:30 +08005445BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5446 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005447{
5448 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005449 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005450
Paolo Bonzini50717e92013-01-21 17:09:45 +01005451 assert((granularity & (granularity - 1)) == 0);
5452
Fam Zhenge4654d22013-11-13 18:29:43 +08005453 granularity >>= BDRV_SECTOR_BITS;
5454 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005455 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005456 if (bitmap_size < 0) {
5457 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5458 errno = -bitmap_size;
5459 return NULL;
5460 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005461 bitmap = g_new0(BdrvDirtyBitmap, 1);
Stefan Hajnoczi786a4ea2015-03-23 15:29:26 +00005462 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
Fam Zhenge4654d22013-11-13 18:29:43 +08005463 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5464 return bitmap;
5465}
5466
5467void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5468{
5469 BdrvDirtyBitmap *bm, *next;
5470 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5471 if (bm == bitmap) {
5472 QLIST_REMOVE(bitmap, list);
5473 hbitmap_free(bitmap->bitmap);
5474 g_free(bitmap);
5475 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005476 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005477 }
5478}
5479
Fam Zheng21b56832013-11-13 18:29:44 +08005480BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5481{
5482 BdrvDirtyBitmap *bm;
5483 BlockDirtyInfoList *list = NULL;
5484 BlockDirtyInfoList **plist = &list;
5485
5486 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005487 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5488 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005489 info->count = bdrv_get_dirty_count(bs, bm);
5490 info->granularity =
5491 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5492 entry->value = info;
5493 *plist = entry;
5494 plist = &entry->next;
5495 }
5496
5497 return list;
5498}
5499
Fam Zhenge4654d22013-11-13 18:29:43 +08005500int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005501{
Fam Zhenge4654d22013-11-13 18:29:43 +08005502 if (bitmap) {
5503 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005504 } else {
5505 return 0;
5506 }
5507}
5508
Fam Zhenge4654d22013-11-13 18:29:43 +08005509void bdrv_dirty_iter_init(BlockDriverState *bs,
5510 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005511{
Fam Zhenge4654d22013-11-13 18:29:43 +08005512 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005513}
5514
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005515void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5516 int64_t cur_sector, int nr_sectors)
5517{
5518 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5519}
5520
5521void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5522 int64_t cur_sector, int nr_sectors)
5523{
5524 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5525}
5526
5527static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5528 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005529{
Fam Zhenge4654d22013-11-13 18:29:43 +08005530 BdrvDirtyBitmap *bitmap;
5531 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5532 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005533 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005534}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005535
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005536static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5537 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005538{
5539 BdrvDirtyBitmap *bitmap;
5540 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5541 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5542 }
5543}
5544
5545int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5546{
5547 return hbitmap_count(bitmap->bitmap);
5548}
5549
Fam Zheng9fcb0252013-08-23 09:14:46 +08005550/* Get a reference to bs */
5551void bdrv_ref(BlockDriverState *bs)
5552{
5553 bs->refcnt++;
5554}
5555
5556/* Release a previously grabbed reference to bs.
5557 * If after releasing, reference count is zero, the BlockDriverState is
5558 * deleted. */
5559void bdrv_unref(BlockDriverState *bs)
5560{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005561 if (!bs) {
5562 return;
5563 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005564 assert(bs->refcnt > 0);
5565 if (--bs->refcnt == 0) {
5566 bdrv_delete(bs);
5567 }
5568}
5569
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005570struct BdrvOpBlocker {
5571 Error *reason;
5572 QLIST_ENTRY(BdrvOpBlocker) list;
5573};
5574
5575bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5576{
5577 BdrvOpBlocker *blocker;
5578 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5579 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5580 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5581 if (errp) {
5582 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005583 bdrv_get_device_name(bs),
5584 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005585 }
5586 return true;
5587 }
5588 return false;
5589}
5590
5591void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5592{
5593 BdrvOpBlocker *blocker;
5594 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5595
Markus Armbruster5839e532014-08-19 10:31:08 +02005596 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005597 blocker->reason = reason;
5598 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5599}
5600
5601void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5602{
5603 BdrvOpBlocker *blocker, *next;
5604 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5605 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5606 if (blocker->reason == reason) {
5607 QLIST_REMOVE(blocker, list);
5608 g_free(blocker);
5609 }
5610 }
5611}
5612
5613void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5614{
5615 int i;
5616 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5617 bdrv_op_block(bs, i, reason);
5618 }
5619}
5620
5621void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5622{
5623 int i;
5624 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5625 bdrv_op_unblock(bs, i, reason);
5626 }
5627}
5628
5629bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5630{
5631 int i;
5632
5633 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5634 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5635 return false;
5636 }
5637 }
5638 return true;
5639}
5640
Luiz Capitulino28a72822011-09-26 17:43:50 -03005641void bdrv_iostatus_enable(BlockDriverState *bs)
5642{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005643 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005644 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005645}
5646
5647/* The I/O status is only enabled if the drive explicitly
5648 * enables it _and_ the VM is configured to stop on errors */
5649bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5650{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005651 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005652 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5653 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5654 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005655}
5656
5657void bdrv_iostatus_disable(BlockDriverState *bs)
5658{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005659 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005660}
5661
5662void bdrv_iostatus_reset(BlockDriverState *bs)
5663{
5664 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005665 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005666 if (bs->job) {
5667 block_job_iostatus_reset(bs->job);
5668 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005669 }
5670}
5671
Luiz Capitulino28a72822011-09-26 17:43:50 -03005672void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5673{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005674 assert(bdrv_iostatus_is_enabled(bs));
5675 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005676 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5677 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005678 }
5679}
5680
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005681void bdrv_img_create(const char *filename, const char *fmt,
5682 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005683 char *options, uint64_t img_size, int flags,
5684 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005685{
Chunyan Liu83d05212014-06-05 17:20:51 +08005686 QemuOptsList *create_opts = NULL;
5687 QemuOpts *opts = NULL;
5688 const char *backing_fmt, *backing_file;
5689 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005690 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005691 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005692 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005693 int ret = 0;
5694
5695 /* Find driver and parse its options */
5696 drv = bdrv_find_format(fmt);
5697 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005698 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005699 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005700 }
5701
Max Reitzb65a5e12015-02-05 13:58:12 -05005702 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005703 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005704 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005705 }
5706
Max Reitzc6149722014-12-02 18:32:45 +01005707 if (!drv->create_opts) {
5708 error_setg(errp, "Format driver '%s' does not support image creation",
5709 drv->format_name);
5710 return;
5711 }
5712
5713 if (!proto_drv->create_opts) {
5714 error_setg(errp, "Protocol driver '%s' does not support image creation",
5715 proto_drv->format_name);
5716 return;
5717 }
5718
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005719 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5720 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005721
5722 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005723 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005724 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005725
5726 /* Parse -o options */
5727 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005728 qemu_opts_do_parse(opts, options, NULL, &local_err);
5729 if (local_err) {
5730 error_report_err(local_err);
5731 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005732 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005733 goto out;
5734 }
5735 }
5736
5737 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005738 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005739 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005740 error_setg(errp, "Backing file not supported for file format '%s'",
5741 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005742 goto out;
5743 }
5744 }
5745
5746 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005747 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005748 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005749 error_setg(errp, "Backing file format not supported for file "
5750 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005751 goto out;
5752 }
5753 }
5754
Chunyan Liu83d05212014-06-05 17:20:51 +08005755 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5756 if (backing_file) {
5757 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005758 error_setg(errp, "Error: Trying to create an image with the "
5759 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005760 goto out;
5761 }
5762 }
5763
Chunyan Liu83d05212014-06-05 17:20:51 +08005764 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5765 if (backing_fmt) {
5766 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005767 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005768 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005769 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005770 goto out;
5771 }
5772 }
5773
5774 // The size for the image must always be specified, with one exception:
5775 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005776 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5777 if (size == -1) {
5778 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005779 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01005780 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005781 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005782 int back_flags;
5783
Max Reitz29168012014-11-26 17:20:27 +01005784 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
5785 full_backing, PATH_MAX,
5786 &local_err);
5787 if (local_err) {
5788 g_free(full_backing);
5789 goto out;
5790 }
5791
Paolo Bonzini63090da2012-04-12 14:01:03 +02005792 /* backing files always opened read-only */
5793 back_flags =
5794 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005795
Max Reitzf67503e2014-02-18 18:33:05 +01005796 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01005797 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005798 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01005799 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005800 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005801 goto out;
5802 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005803 size = bdrv_getlength(bs);
5804 if (size < 0) {
5805 error_setg_errno(errp, -size, "Could not get size of '%s'",
5806 backing_file);
5807 bdrv_unref(bs);
5808 goto out;
5809 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005810
Markus Armbruster39101f22015-02-12 16:46:36 +01005811 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01005812
5813 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005814 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005815 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005816 goto out;
5817 }
5818 }
5819
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005820 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08005821 printf("Formatting '%s', fmt=%s", filename, fmt);
5822 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005823 puts("");
5824 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005825
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005826 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005827
Max Reitzcc84d902013-09-06 17:14:26 +02005828 if (ret == -EFBIG) {
5829 /* This is generally a better message than whatever the driver would
5830 * deliver (especially because of the cluster_size_hint), since that
5831 * is most probably not much different from "image too large". */
5832 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005833 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005834 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005835 }
Max Reitzcc84d902013-09-06 17:14:26 +02005836 error_setg(errp, "The image size is too large for file format '%s'"
5837 "%s", fmt, cluster_size_hint);
5838 error_free(local_err);
5839 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005840 }
5841
5842out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005843 qemu_opts_del(opts);
5844 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005845 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005846 error_propagate(errp, local_err);
5847 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005848}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005849
5850AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5851{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005852 return bs->aio_context;
5853}
5854
5855void bdrv_detach_aio_context(BlockDriverState *bs)
5856{
Max Reitz33384422014-06-20 21:57:33 +02005857 BdrvAioNotifier *baf;
5858
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005859 if (!bs->drv) {
5860 return;
5861 }
5862
Max Reitz33384422014-06-20 21:57:33 +02005863 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5864 baf->detach_aio_context(baf->opaque);
5865 }
5866
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005867 if (bs->io_limits_enabled) {
5868 throttle_detach_aio_context(&bs->throttle_state);
5869 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005870 if (bs->drv->bdrv_detach_aio_context) {
5871 bs->drv->bdrv_detach_aio_context(bs);
5872 }
5873 if (bs->file) {
5874 bdrv_detach_aio_context(bs->file);
5875 }
5876 if (bs->backing_hd) {
5877 bdrv_detach_aio_context(bs->backing_hd);
5878 }
5879
5880 bs->aio_context = NULL;
5881}
5882
5883void bdrv_attach_aio_context(BlockDriverState *bs,
5884 AioContext *new_context)
5885{
Max Reitz33384422014-06-20 21:57:33 +02005886 BdrvAioNotifier *ban;
5887
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005888 if (!bs->drv) {
5889 return;
5890 }
5891
5892 bs->aio_context = new_context;
5893
5894 if (bs->backing_hd) {
5895 bdrv_attach_aio_context(bs->backing_hd, new_context);
5896 }
5897 if (bs->file) {
5898 bdrv_attach_aio_context(bs->file, new_context);
5899 }
5900 if (bs->drv->bdrv_attach_aio_context) {
5901 bs->drv->bdrv_attach_aio_context(bs, new_context);
5902 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005903 if (bs->io_limits_enabled) {
5904 throttle_attach_aio_context(&bs->throttle_state, new_context);
5905 }
Max Reitz33384422014-06-20 21:57:33 +02005906
5907 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5908 ban->attached_aio_context(new_context, ban->opaque);
5909 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005910}
5911
5912void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5913{
5914 bdrv_drain_all(); /* ensure there are no in-flight requests */
5915
5916 bdrv_detach_aio_context(bs);
5917
5918 /* This function executes in the old AioContext so acquire the new one in
5919 * case it runs in a different thread.
5920 */
5921 aio_context_acquire(new_context);
5922 bdrv_attach_aio_context(bs, new_context);
5923 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005924}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005925
Max Reitz33384422014-06-20 21:57:33 +02005926void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5927 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5928 void (*detach_aio_context)(void *opaque), void *opaque)
5929{
5930 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5931 *ban = (BdrvAioNotifier){
5932 .attached_aio_context = attached_aio_context,
5933 .detach_aio_context = detach_aio_context,
5934 .opaque = opaque
5935 };
5936
5937 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5938}
5939
5940void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5941 void (*attached_aio_context)(AioContext *,
5942 void *),
5943 void (*detach_aio_context)(void *),
5944 void *opaque)
5945{
5946 BdrvAioNotifier *ban, *ban_next;
5947
5948 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5949 if (ban->attached_aio_context == attached_aio_context &&
5950 ban->detach_aio_context == detach_aio_context &&
5951 ban->opaque == opaque)
5952 {
5953 QLIST_REMOVE(ban, list);
5954 g_free(ban);
5955
5956 return;
5957 }
5958 }
5959
5960 abort();
5961}
5962
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005963void bdrv_add_before_write_notifier(BlockDriverState *bs,
5964 NotifierWithReturn *notifier)
5965{
5966 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5967}
Max Reitz6f176b42013-09-03 10:09:50 +02005968
Max Reitz77485432014-10-27 11:12:50 +01005969int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
5970 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02005971{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005972 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005973 return -ENOTSUP;
5974 }
Max Reitz77485432014-10-27 11:12:50 +01005975 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02005976}
Benoît Canetf6186f42013-10-02 14:33:48 +02005977
Benoît Canetb5042a32014-03-03 19:11:34 +01005978/* This function will be called by the bdrv_recurse_is_first_non_filter method
5979 * of block filter and by bdrv_is_first_non_filter.
5980 * It is used to test if the given bs is the candidate or recurse more in the
5981 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005982 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005983bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5984 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005985{
Benoît Canetb5042a32014-03-03 19:11:34 +01005986 /* return false if basic checks fails */
5987 if (!bs || !bs->drv) {
5988 return false;
5989 }
5990
5991 /* the code reached a non block filter driver -> check if the bs is
5992 * the same as the candidate. It's the recursion termination condition.
5993 */
5994 if (!bs->drv->is_filter) {
5995 return bs == candidate;
5996 }
5997 /* Down this path the driver is a block filter driver */
5998
5999 /* If the block filter recursion method is defined use it to recurse down
6000 * the node graph.
6001 */
6002 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006003 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6004 }
6005
Benoît Canetb5042a32014-03-03 19:11:34 +01006006 /* the driver is a block filter but don't allow to recurse -> return false
6007 */
6008 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006009}
6010
6011/* This function checks if the candidate is the first non filter bs down it's
6012 * bs chain. Since we don't have pointers to parents it explore all bs chains
6013 * from the top. Some filters can choose not to pass down the recursion.
6014 */
6015bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6016{
6017 BlockDriverState *bs;
6018
6019 /* walk down the bs forest recursively */
6020 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6021 bool perm;
6022
Benoît Canetb5042a32014-03-03 19:11:34 +01006023 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006024 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006025
6026 /* candidate is the first non filter */
6027 if (perm) {
6028 return true;
6029 }
6030 }
6031
6032 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006033}
Benoît Canet09158f02014-06-27 18:25:25 +02006034
6035BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6036{
6037 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006038 AioContext *aio_context;
6039
Benoît Canet09158f02014-06-27 18:25:25 +02006040 if (!to_replace_bs) {
6041 error_setg(errp, "Node name '%s' not found", node_name);
6042 return NULL;
6043 }
6044
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006045 aio_context = bdrv_get_aio_context(to_replace_bs);
6046 aio_context_acquire(aio_context);
6047
Benoît Canet09158f02014-06-27 18:25:25 +02006048 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006049 to_replace_bs = NULL;
6050 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006051 }
6052
6053 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6054 * most non filter in order to prevent data corruption.
6055 * Another benefit is that this tests exclude backing files which are
6056 * blocked by the backing blockers.
6057 */
6058 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6059 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006060 to_replace_bs = NULL;
6061 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006062 }
6063
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006064out:
6065 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006066 return to_replace_bs;
6067}
Ming Lei448ad912014-07-04 18:04:33 +08006068
6069void bdrv_io_plug(BlockDriverState *bs)
6070{
6071 BlockDriver *drv = bs->drv;
6072 if (drv && drv->bdrv_io_plug) {
6073 drv->bdrv_io_plug(bs);
6074 } else if (bs->file) {
6075 bdrv_io_plug(bs->file);
6076 }
6077}
6078
6079void bdrv_io_unplug(BlockDriverState *bs)
6080{
6081 BlockDriver *drv = bs->drv;
6082 if (drv && drv->bdrv_io_unplug) {
6083 drv->bdrv_io_unplug(bs);
6084 } else if (bs->file) {
6085 bdrv_io_unplug(bs->file);
6086 }
6087}
6088
6089void bdrv_flush_io_queue(BlockDriverState *bs)
6090{
6091 BlockDriver *drv = bs->drv;
6092 if (drv && drv->bdrv_flush_io_queue) {
6093 drv->bdrv_flush_io_queue(bs);
6094 } else if (bs->file) {
6095 bdrv_flush_io_queue(bs->file);
6096 }
6097}
Max Reitz91af7012014-07-18 20:24:56 +02006098
6099static bool append_open_options(QDict *d, BlockDriverState *bs)
6100{
6101 const QDictEntry *entry;
6102 bool found_any = false;
6103
6104 for (entry = qdict_first(bs->options); entry;
6105 entry = qdict_next(bs->options, entry))
6106 {
6107 /* Only take options for this level and exclude all non-driver-specific
6108 * options */
6109 if (!strchr(qdict_entry_key(entry), '.') &&
6110 strcmp(qdict_entry_key(entry), "node-name"))
6111 {
6112 qobject_incref(qdict_entry_value(entry));
6113 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6114 found_any = true;
6115 }
6116 }
6117
6118 return found_any;
6119}
6120
6121/* Updates the following BDS fields:
6122 * - exact_filename: A filename which may be used for opening a block device
6123 * which (mostly) equals the given BDS (even without any
6124 * other options; so reading and writing must return the same
6125 * results, but caching etc. may be different)
6126 * - full_open_options: Options which, when given when opening a block device
6127 * (without a filename), result in a BDS (mostly)
6128 * equalling the given one
6129 * - filename: If exact_filename is set, it is copied here. Otherwise,
6130 * full_open_options is converted to a JSON object, prefixed with
6131 * "json:" (for use through the JSON pseudo protocol) and put here.
6132 */
6133void bdrv_refresh_filename(BlockDriverState *bs)
6134{
6135 BlockDriver *drv = bs->drv;
6136 QDict *opts;
6137
6138 if (!drv) {
6139 return;
6140 }
6141
6142 /* This BDS's file name will most probably depend on its file's name, so
6143 * refresh that first */
6144 if (bs->file) {
6145 bdrv_refresh_filename(bs->file);
6146 }
6147
6148 if (drv->bdrv_refresh_filename) {
6149 /* Obsolete information is of no use here, so drop the old file name
6150 * information before refreshing it */
6151 bs->exact_filename[0] = '\0';
6152 if (bs->full_open_options) {
6153 QDECREF(bs->full_open_options);
6154 bs->full_open_options = NULL;
6155 }
6156
6157 drv->bdrv_refresh_filename(bs);
6158 } else if (bs->file) {
6159 /* Try to reconstruct valid information from the underlying file */
6160 bool has_open_options;
6161
6162 bs->exact_filename[0] = '\0';
6163 if (bs->full_open_options) {
6164 QDECREF(bs->full_open_options);
6165 bs->full_open_options = NULL;
6166 }
6167
6168 opts = qdict_new();
6169 has_open_options = append_open_options(opts, bs);
6170
6171 /* If no specific options have been given for this BDS, the filename of
6172 * the underlying file should suffice for this one as well */
6173 if (bs->file->exact_filename[0] && !has_open_options) {
6174 strcpy(bs->exact_filename, bs->file->exact_filename);
6175 }
6176 /* Reconstructing the full options QDict is simple for most format block
6177 * drivers, as long as the full options are known for the underlying
6178 * file BDS. The full options QDict of that file BDS should somehow
6179 * contain a representation of the filename, therefore the following
6180 * suffices without querying the (exact_)filename of this BDS. */
6181 if (bs->file->full_open_options) {
6182 qdict_put_obj(opts, "driver",
6183 QOBJECT(qstring_from_str(drv->format_name)));
6184 QINCREF(bs->file->full_open_options);
6185 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6186
6187 bs->full_open_options = opts;
6188 } else {
6189 QDECREF(opts);
6190 }
6191 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6192 /* There is no underlying file BDS (at least referenced by BDS.file),
6193 * so the full options QDict should be equal to the options given
6194 * specifically for this block device when it was opened (plus the
6195 * driver specification).
6196 * Because those options don't change, there is no need to update
6197 * full_open_options when it's already set. */
6198
6199 opts = qdict_new();
6200 append_open_options(opts, bs);
6201 qdict_put_obj(opts, "driver",
6202 QOBJECT(qstring_from_str(drv->format_name)));
6203
6204 if (bs->exact_filename[0]) {
6205 /* This may not work for all block protocol drivers (some may
6206 * require this filename to be parsed), but we have to find some
6207 * default solution here, so just include it. If some block driver
6208 * does not support pure options without any filename at all or
6209 * needs some special format of the options QDict, it needs to
6210 * implement the driver-specific bdrv_refresh_filename() function.
6211 */
6212 qdict_put_obj(opts, "filename",
6213 QOBJECT(qstring_from_str(bs->exact_filename)));
6214 }
6215
6216 bs->full_open_options = opts;
6217 }
6218
6219 if (bs->exact_filename[0]) {
6220 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6221 } else if (bs->full_open_options) {
6222 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6223 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6224 qstring_get_str(json));
6225 QDECREF(json);
6226 }
6227}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006228
6229/* This accessor function purpose is to allow the device models to access the
6230 * BlockAcctStats structure embedded inside a BlockDriverState without being
6231 * aware of the BlockDriverState structure layout.
6232 * It will go away when the BlockAcctStats structure will be moved inside
6233 * the device models.
6234 */
6235BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6236{
6237 return &bs->stats;
6238}