blob: 70aed80089c2bc23b2299ea8e348c0737837dd68 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010027#include "block/block_int.h"
28#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010029#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010030#include "qapi/qmp/qjson.h"
Markus Armbrusterbfb197e2014-10-07 13:59:11 +020031#include "sysemu/block-backend.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Fam Zhengde50a202015-03-25 15:27:26 +080033#include "sysemu/qtest.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010034#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010035#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010036#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030037#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010038#include "qemu/timer.h"
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +020039#include "qapi-event.h"
bellardfc01f7e2003-06-30 10:03:06 +000040
Juan Quintela71e72a12009-07-27 16:12:56 +020041#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000042#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000045#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000046#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000047#include <sys/disk.h>
48#endif
blueswir1c5e97232009-03-07 20:06:23 +000049#endif
bellard7674e7b2005-04-26 21:59:26 +000050
aliguori49dc7682009-03-08 16:26:59 +000051#ifdef _WIN32
52#include <windows.h>
53#endif
54
Fam Zhenge4654d22013-11-13 18:29:43 +080055struct BdrvDirtyBitmap {
56 HBitmap *bitmap;
57 QLIST_ENTRY(BdrvDirtyBitmap) list;
58};
59
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010060#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
61
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020062static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000063 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020064 BlockCompletionFunc *cb, void *opaque);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020065static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +000066 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +020067 BlockCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020068static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
70 QEMUIOVector *iov);
71static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
73 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010074static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
75 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000076 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010077static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
78 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000079 BdrvRequestFlags flags);
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020080static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
81 int64_t sector_num,
82 QEMUIOVector *qiov,
83 int nb_sectors,
84 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +020085 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +020086 void *opaque,
87 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010088static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010089static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020090 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000091
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010092static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
93 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000094
Benoît Canetdc364f42014-01-23 21:31:32 +010095static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
96 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
97
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010098static QLIST_HEAD(, BlockDriver) bdrv_drivers =
99 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +0000100
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +0300101static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
102 int nr_sectors);
103static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
104 int nr_sectors);
Markus Armbrustereb852012009-10-27 18:41:44 +0100105/* If non-zero, use only whitelisted block drivers */
106static int use_bdrv_whitelist;
107
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000108#ifdef _WIN32
109static int is_windows_drive_prefix(const char *filename)
110{
111 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
112 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
113 filename[1] == ':');
114}
115
116int is_windows_drive(const char *filename)
117{
118 if (is_windows_drive_prefix(filename) &&
119 filename[2] == '\0')
120 return 1;
121 if (strstart(filename, "\\\\.\\", NULL) ||
122 strstart(filename, "//./", NULL))
123 return 1;
124 return 0;
125}
126#endif
127
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800128/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200129void bdrv_set_io_limits(BlockDriverState *bs,
130 ThrottleConfig *cfg)
131{
132 int i;
133
134 throttle_config(&bs->throttle_state, cfg);
135
136 for (i = 0; i < 2; i++) {
137 qemu_co_enter_next(&bs->throttled_reqs[i]);
138 }
139}
140
141/* this function drain all the throttled IOs */
142static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
143{
144 bool drained = false;
145 bool enabled = bs->io_limits_enabled;
146 int i;
147
148 bs->io_limits_enabled = false;
149
150 for (i = 0; i < 2; i++) {
151 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
152 drained = true;
153 }
154 }
155
156 bs->io_limits_enabled = enabled;
157
158 return drained;
159}
160
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161void bdrv_io_limits_disable(BlockDriverState *bs)
162{
163 bs->io_limits_enabled = false;
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800166
Benoît Canetcc0681c2013-09-02 14:14:39 +0200167 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800168}
169
Benoît Canetcc0681c2013-09-02 14:14:39 +0200170static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800171{
172 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200173 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800174}
175
Benoît Canetcc0681c2013-09-02 14:14:39 +0200176static void bdrv_throttle_write_timer_cb(void *opaque)
177{
178 BlockDriverState *bs = opaque;
179 qemu_co_enter_next(&bs->throttled_reqs[1]);
180}
181
182/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800183void bdrv_io_limits_enable(BlockDriverState *bs)
184{
Fam Zhengde50a202015-03-25 15:27:26 +0800185 int clock_type = QEMU_CLOCK_REALTIME;
186
187 if (qtest_enabled()) {
188 /* For testing block IO throttling only */
189 clock_type = QEMU_CLOCK_VIRTUAL;
190 }
Benoît Canetcc0681c2013-09-02 14:14:39 +0200191 assert(!bs->io_limits_enabled);
192 throttle_init(&bs->throttle_state,
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +0200193 bdrv_get_aio_context(bs),
Fam Zhengde50a202015-03-25 15:27:26 +0800194 clock_type,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200195 bdrv_throttle_read_timer_cb,
196 bdrv_throttle_write_timer_cb,
197 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800198 bs->io_limits_enabled = true;
199}
200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201/* This function makes an IO wait if needed
202 *
203 * @nb_sectors: the number of sectors of the IO
204 * @is_write: is the IO a write
205 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800206static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100207 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200208 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800209{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200210 /* does this io must wait */
211 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800212
Benoît Canetcc0681c2013-09-02 14:14:39 +0200213 /* if must wait or any request of this type throttled queue the IO */
214 if (must_wait ||
215 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
216 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800217 }
218
Benoît Canetcc0681c2013-09-02 14:14:39 +0200219 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100220 throttle_account(&bs->throttle_state, is_write, bytes);
221
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800222
Benoît Canetcc0681c2013-09-02 14:14:39 +0200223 /* if the next request must wait -> do nothing */
224 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
225 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800226 }
227
Benoît Canetcc0681c2013-09-02 14:14:39 +0200228 /* else queue next request for execution */
229 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800230}
231
Kevin Wolf339064d2013-11-28 10:23:32 +0100232size_t bdrv_opt_mem_align(BlockDriverState *bs)
233{
234 if (!bs || !bs->drv) {
235 /* 4k should be on the safe side */
236 return 4096;
237 }
238
239 return bs->bl.opt_mem_alignment;
240}
241
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000242/* check if the path starts with "<protocol>:" */
Max Reitz5c984152014-12-03 14:57:22 +0100243int path_has_protocol(const char *path)
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000244{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 const char *p;
246
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000247#ifdef _WIN32
248 if (is_windows_drive(path) ||
249 is_windows_drive_prefix(path)) {
250 return 0;
251 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200252 p = path + strcspn(path, ":/\\");
253#else
254 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000255#endif
256
Paolo Bonzini947995c2012-05-08 16:51:48 +0200257 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000258}
259
bellard83f64092006-08-01 16:21:11 +0000260int path_is_absolute(const char *path)
261{
bellard21664422007-01-07 18:22:37 +0000262#ifdef _WIN32
263 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200264 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000265 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200266 }
267 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000268#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200269 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000270#endif
bellard83f64092006-08-01 16:21:11 +0000271}
272
273/* if filename is absolute, just copy it to dest. Otherwise, build a
274 path to it by considering it is relative to base_path. URL are
275 supported. */
276void path_combine(char *dest, int dest_size,
277 const char *base_path,
278 const char *filename)
279{
280 const char *p, *p1;
281 int len;
282
283 if (dest_size <= 0)
284 return;
285 if (path_is_absolute(filename)) {
286 pstrcpy(dest, dest_size, filename);
287 } else {
288 p = strchr(base_path, ':');
289 if (p)
290 p++;
291 else
292 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000293 p1 = strrchr(base_path, '/');
294#ifdef _WIN32
295 {
296 const char *p2;
297 p2 = strrchr(base_path, '\\');
298 if (!p1 || p2 > p1)
299 p1 = p2;
300 }
301#endif
bellard83f64092006-08-01 16:21:11 +0000302 if (p1)
303 p1++;
304 else
305 p1 = base_path;
306 if (p1 > p)
307 p = p1;
308 len = p - base_path;
309 if (len > dest_size - 1)
310 len = dest_size - 1;
311 memcpy(dest, base_path, len);
312 dest[len] = '\0';
313 pstrcat(dest, dest_size, filename);
314 }
315}
316
Max Reitz0a828552014-11-26 17:20:25 +0100317void bdrv_get_full_backing_filename_from_filename(const char *backed,
318 const char *backing,
Max Reitz9f074292014-11-26 17:20:26 +0100319 char *dest, size_t sz,
320 Error **errp)
Max Reitz0a828552014-11-26 17:20:25 +0100321{
Max Reitz9f074292014-11-26 17:20:26 +0100322 if (backing[0] == '\0' || path_has_protocol(backing) ||
323 path_is_absolute(backing))
324 {
Max Reitz0a828552014-11-26 17:20:25 +0100325 pstrcpy(dest, sz, backing);
Max Reitz9f074292014-11-26 17:20:26 +0100326 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
327 error_setg(errp, "Cannot use relative backing file names for '%s'",
328 backed);
Max Reitz0a828552014-11-26 17:20:25 +0100329 } else {
330 path_combine(dest, sz, backed, backing);
331 }
332}
333
Max Reitz9f074292014-11-26 17:20:26 +0100334void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
335 Error **errp)
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200336{
Max Reitz9f074292014-11-26 17:20:26 +0100337 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
338
339 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
340 dest, sz, errp);
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200341}
342
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500343void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000344{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100345 /* Block drivers without coroutine functions need emulation */
346 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200347 bdrv->bdrv_co_readv = bdrv_co_readv_em;
348 bdrv->bdrv_co_writev = bdrv_co_writev_em;
349
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100350 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
351 * the block driver lacks aio we need to emulate that too.
352 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200353 if (!bdrv->bdrv_aio_readv) {
354 /* add AIO emulation layer */
355 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
356 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200357 }
bellard83f64092006-08-01 16:21:11 +0000358 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200359
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100360 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000361}
bellardb3380822004-03-14 21:38:54 +0000362
Markus Armbruster7f06d472014-10-07 13:59:12 +0200363BlockDriverState *bdrv_new_root(void)
bellardfc01f7e2003-06-30 10:03:06 +0000364{
Markus Armbruster7f06d472014-10-07 13:59:12 +0200365 BlockDriverState *bs = bdrv_new();
Markus Armbrustere4e99862014-10-07 13:59:03 +0200366
Markus Armbrustere4e99862014-10-07 13:59:03 +0200367 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
Markus Armbrustere4e99862014-10-07 13:59:03 +0200368 return bs;
369}
370
371BlockDriverState *bdrv_new(void)
372{
373 BlockDriverState *bs;
374 int i;
375
Markus Armbruster5839e532014-08-19 10:31:08 +0200376 bs = g_new0(BlockDriverState, 1);
Fam Zhenge4654d22013-11-13 18:29:43 +0800377 QLIST_INIT(&bs->dirty_bitmaps);
Fam Zhengfbe40ff2014-05-23 21:29:42 +0800378 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
379 QLIST_INIT(&bs->op_blockers[i]);
380 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300381 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200382 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200383 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200384 qemu_co_queue_init(&bs->throttled_reqs[0]);
385 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800386 bs->refcnt = 1;
Stefan Hajnoczidcd04222014-05-08 16:34:37 +0200387 bs->aio_context = qemu_get_aio_context();
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200388
bellardb3380822004-03-14 21:38:54 +0000389 return bs;
390}
391
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200392void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
393{
394 notifier_list_add(&bs->close_notifiers, notify);
395}
396
bellardea2384d2004-08-01 21:59:26 +0000397BlockDriver *bdrv_find_format(const char *format_name)
398{
399 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100400 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
401 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000402 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100403 }
bellardea2384d2004-08-01 21:59:26 +0000404 }
405 return NULL;
406}
407
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800408static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100409{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800410 static const char *whitelist_rw[] = {
411 CONFIG_BDRV_RW_WHITELIST
412 };
413 static const char *whitelist_ro[] = {
414 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100415 };
416 const char **p;
417
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800418 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100419 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800420 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100421
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800422 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100423 if (!strcmp(drv->format_name, *p)) {
424 return 1;
425 }
426 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800427 if (read_only) {
428 for (p = whitelist_ro; *p; p++) {
429 if (!strcmp(drv->format_name, *p)) {
430 return 1;
431 }
432 }
433 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100434 return 0;
435}
436
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800437BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
438 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100439{
440 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800441 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100442}
443
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800444typedef struct CreateCo {
445 BlockDriver *drv;
446 char *filename;
Chunyan Liu83d05212014-06-05 17:20:51 +0800447 QemuOpts *opts;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200449 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800450} CreateCo;
451
452static void coroutine_fn bdrv_create_co_entry(void *opaque)
453{
Max Reitzcc84d902013-09-06 17:14:26 +0200454 Error *local_err = NULL;
455 int ret;
456
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800457 CreateCo *cco = opaque;
458 assert(cco->drv);
459
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800460 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100461 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200462 error_propagate(&cco->err, local_err);
463 }
464 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800465}
466
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200467int bdrv_create(BlockDriver *drv, const char* filename,
Chunyan Liu83d05212014-06-05 17:20:51 +0800468 QemuOpts *opts, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000469{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800470 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200471
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 Coroutine *co;
473 CreateCo cco = {
474 .drv = drv,
475 .filename = g_strdup(filename),
Chunyan Liu83d05212014-06-05 17:20:51 +0800476 .opts = opts,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800477 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200478 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800479 };
480
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800481 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200482 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300483 ret = -ENOTSUP;
484 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800485 }
486
487 if (qemu_in_coroutine()) {
488 /* Fast-path if already in coroutine context */
489 bdrv_create_co_entry(&cco);
490 } else {
491 co = qemu_coroutine_create(bdrv_create_co_entry);
492 qemu_coroutine_enter(co, &cco);
493 while (cco.ret == NOT_DONE) {
Paolo Bonzinib47ec2c2014-07-07 15:18:01 +0200494 aio_poll(qemu_get_aio_context(), true);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800495 }
496 }
497
498 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200499 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100500 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200501 error_propagate(errp, cco.err);
502 } else {
503 error_setg_errno(errp, -ret, "Could not create image");
504 }
505 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800506
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300507out:
508 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800509 return ret;
bellardea2384d2004-08-01 21:59:26 +0000510}
511
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800512int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200513{
514 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200515 Error *local_err = NULL;
516 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200517
Max Reitzb65a5e12015-02-05 13:58:12 -0500518 drv = bdrv_find_protocol(filename, true, errp);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200519 if (drv == NULL) {
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000520 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200521 }
522
Chunyan Liuc282e1f2014-06-05 17:21:11 +0800523 ret = bdrv_create(drv, filename, opts, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100524 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200525 error_propagate(errp, local_err);
526 }
527 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200528}
529
Kevin Wolf3baca892014-07-16 17:48:16 +0200530void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100531{
532 BlockDriver *drv = bs->drv;
Kevin Wolf3baca892014-07-16 17:48:16 +0200533 Error *local_err = NULL;
Kevin Wolfd34682c2013-12-11 19:26:16 +0100534
535 memset(&bs->bl, 0, sizeof(bs->bl));
536
Kevin Wolf466ad822013-12-11 19:50:32 +0100537 if (!drv) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200538 return;
Kevin Wolf466ad822013-12-11 19:50:32 +0100539 }
540
541 /* Take some limits from the children as a default */
542 if (bs->file) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200543 bdrv_refresh_limits(bs->file, &local_err);
544 if (local_err) {
545 error_propagate(errp, local_err);
546 return;
547 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100548 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Peter Lieven2647fab2014-10-27 10:18:44 +0100549 bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100550 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
551 } else {
552 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100553 }
554
555 if (bs->backing_hd) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200556 bdrv_refresh_limits(bs->backing_hd, &local_err);
557 if (local_err) {
558 error_propagate(errp, local_err);
559 return;
560 }
Kevin Wolf466ad822013-12-11 19:50:32 +0100561 bs->bl.opt_transfer_length =
562 MAX(bs->bl.opt_transfer_length,
563 bs->backing_hd->bl.opt_transfer_length);
Peter Lieven2647fab2014-10-27 10:18:44 +0100564 bs->bl.max_transfer_length =
565 MIN_NON_ZERO(bs->bl.max_transfer_length,
566 bs->backing_hd->bl.max_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100567 bs->bl.opt_mem_alignment =
568 MAX(bs->bl.opt_mem_alignment,
569 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100570 }
571
572 /* Then let the driver override it */
573 if (drv->bdrv_refresh_limits) {
Kevin Wolf3baca892014-07-16 17:48:16 +0200574 drv->bdrv_refresh_limits(bs, errp);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100575 }
Kevin Wolfd34682c2013-12-11 19:26:16 +0100576}
577
Ekaterina Tumanova892b7de2015-02-16 12:47:54 +0100578/**
579 * Try to get @bs's logical and physical block size.
580 * On success, store them in @bsz struct and return 0.
581 * On failure return -errno.
582 * @bs must not be empty.
583 */
584int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
585{
586 BlockDriver *drv = bs->drv;
587
588 if (drv && drv->bdrv_probe_blocksizes) {
589 return drv->bdrv_probe_blocksizes(bs, bsz);
590 }
591
592 return -ENOTSUP;
593}
594
595/**
596 * Try to get @bs's geometry (cyls, heads, sectors).
597 * On success, store them in @geo struct and return 0.
598 * On failure return -errno.
599 * @bs must not be empty.
600 */
601int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
602{
603 BlockDriver *drv = bs->drv;
604
605 if (drv && drv->bdrv_probe_geometry) {
606 return drv->bdrv_probe_geometry(bs, geo);
607 }
608
609 return -ENOTSUP;
610}
611
Jim Meyeringeba25052012-05-28 09:27:54 +0200612/*
613 * Create a uniquely-named empty temporary file.
614 * Return 0 upon success, otherwise a negative errno value.
615 */
616int get_tmp_filename(char *filename, int size)
617{
bellardd5249392004-08-03 21:14:23 +0000618#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000619 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200620 /* GetTempFileName requires that its output buffer (4th param)
621 have length MAX_PATH or greater. */
622 assert(size >= MAX_PATH);
623 return (GetTempPath(MAX_PATH, temp_dir)
624 && GetTempFileName(temp_dir, "qem", 0, filename)
625 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000626#else
bellardea2384d2004-08-01 21:59:26 +0000627 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000628 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000629 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530630 if (!tmpdir) {
631 tmpdir = "/var/tmp";
632 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200633 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
634 return -EOVERFLOW;
635 }
bellardea2384d2004-08-01 21:59:26 +0000636 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800637 if (fd < 0) {
638 return -errno;
639 }
640 if (close(fd) != 0) {
641 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200642 return -errno;
643 }
644 return 0;
bellardd5249392004-08-03 21:14:23 +0000645#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200646}
bellardea2384d2004-08-01 21:59:26 +0000647
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200648/*
649 * Detect host devices. By convention, /dev/cdrom[N] is always
650 * recognized as a host CDROM.
651 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200652static BlockDriver *find_hdev_driver(const char *filename)
653{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200654 int score_max = 0, score;
655 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200656
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100657 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200658 if (d->bdrv_probe_device) {
659 score = d->bdrv_probe_device(filename);
660 if (score > score_max) {
661 score_max = score;
662 drv = d;
663 }
664 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200665 }
666
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200667 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200668}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200669
Kevin Wolf98289622013-07-10 15:47:39 +0200670BlockDriver *bdrv_find_protocol(const char *filename,
Max Reitzb65a5e12015-02-05 13:58:12 -0500671 bool allow_protocol_prefix,
672 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200673{
674 BlockDriver *drv1;
675 char protocol[128];
676 int len;
677 const char *p;
678
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200679 /* TODO Drivers without bdrv_file_open must be specified explicitly */
680
Christoph Hellwig39508e72010-06-23 12:25:17 +0200681 /*
682 * XXX(hch): we really should not let host device detection
683 * override an explicit protocol specification, but moving this
684 * later breaks access to device names with colons in them.
685 * Thanks to the brain-dead persistent naming schemes on udev-
686 * based Linux systems those actually are quite common.
687 */
688 drv1 = find_hdev_driver(filename);
689 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200690 return drv1;
691 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200692
Kevin Wolf98289622013-07-10 15:47:39 +0200693 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Max Reitzef810432014-12-02 18:32:42 +0100694 return &bdrv_file;
Christoph Hellwig39508e72010-06-23 12:25:17 +0200695 }
Kevin Wolf98289622013-07-10 15:47:39 +0200696
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000697 p = strchr(filename, ':');
698 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200699 len = p - filename;
700 if (len > sizeof(protocol) - 1)
701 len = sizeof(protocol) - 1;
702 memcpy(protocol, filename, len);
703 protocol[len] = '\0';
704 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
705 if (drv1->protocol_name &&
706 !strcmp(drv1->protocol_name, protocol)) {
707 return drv1;
708 }
709 }
Max Reitzb65a5e12015-02-05 13:58:12 -0500710
711 error_setg(errp, "Unknown protocol '%s'", protocol);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200712 return NULL;
713}
714
Markus Armbrusterc6684242014-11-20 16:27:10 +0100715/*
716 * Guess image format by probing its contents.
717 * This is not a good idea when your image is raw (CVE-2008-2004), but
718 * we do it anyway for backward compatibility.
719 *
720 * @buf contains the image's first @buf_size bytes.
Kevin Wolf7cddd372014-11-20 16:27:11 +0100721 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
722 * but can be smaller if the image file is smaller)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100723 * @filename is its filename.
724 *
725 * For all block drivers, call the bdrv_probe() method to get its
726 * probing score.
727 * Return the first block driver with the highest probing score.
728 */
Kevin Wolf38f3ef52014-11-20 16:27:12 +0100729BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
730 const char *filename)
Markus Armbrusterc6684242014-11-20 16:27:10 +0100731{
732 int score_max = 0, score;
733 BlockDriver *drv = NULL, *d;
734
735 QLIST_FOREACH(d, &bdrv_drivers, list) {
736 if (d->bdrv_probe) {
737 score = d->bdrv_probe(buf, buf_size, filename);
738 if (score > score_max) {
739 score_max = score;
740 drv = d;
741 }
742 }
743 }
744
745 return drv;
746}
747
Kevin Wolff500a6d2012-11-12 17:35:27 +0100748static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200749 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000750{
Markus Armbrusterc6684242014-11-20 16:27:10 +0100751 BlockDriver *drv;
Kevin Wolf7cddd372014-11-20 16:27:11 +0100752 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100753 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700754
Kevin Wolf08a00552010-06-01 18:37:31 +0200755 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100756 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Max Reitzef810432014-12-02 18:32:42 +0100757 *pdrv = &bdrv_raw;
Stefan Weilc98ac352010-07-21 21:51:51 +0200758 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700759 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700760
bellard83f64092006-08-01 16:21:11 +0000761 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000762 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200763 error_setg_errno(errp, -ret, "Could not read image for determining its "
764 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200765 *pdrv = NULL;
766 return ret;
bellard83f64092006-08-01 16:21:11 +0000767 }
768
Markus Armbrusterc6684242014-11-20 16:27:10 +0100769 drv = bdrv_probe_all(buf, ret, filename);
Stefan Weilc98ac352010-07-21 21:51:51 +0200770 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200771 error_setg(errp, "Could not determine image format: No compatible "
772 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200773 ret = -ENOENT;
774 }
775 *pdrv = drv;
776 return ret;
bellardea2384d2004-08-01 21:59:26 +0000777}
778
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100779/**
780 * Set the current 'total_sectors' value
Markus Armbruster65a9bb22014-06-26 13:23:17 +0200781 * Return 0 on success, -errno on error.
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100782 */
783static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
784{
785 BlockDriver *drv = bs->drv;
786
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700787 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
788 if (bs->sg)
789 return 0;
790
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100791 /* query actual device if possible, otherwise just trust the hint */
792 if (drv->bdrv_getlength) {
793 int64_t length = drv->bdrv_getlength(bs);
794 if (length < 0) {
795 return length;
796 }
Fam Zheng7e382002013-11-06 19:48:06 +0800797 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100798 }
799
800 bs->total_sectors = hint;
801 return 0;
802}
803
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100804/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100805 * Set open flags for a given discard mode
806 *
807 * Return 0 on success, -1 if the discard mode was invalid.
808 */
809int bdrv_parse_discard_flags(const char *mode, int *flags)
810{
811 *flags &= ~BDRV_O_UNMAP;
812
813 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
814 /* do nothing */
815 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
816 *flags |= BDRV_O_UNMAP;
817 } else {
818 return -1;
819 }
820
821 return 0;
822}
823
824/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100825 * Set open flags for a given cache mode
826 *
827 * Return 0 on success, -1 if the cache mode was invalid.
828 */
829int bdrv_parse_cache_flags(const char *mode, int *flags)
830{
831 *flags &= ~BDRV_O_CACHE_MASK;
832
833 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
834 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100835 } else if (!strcmp(mode, "directsync")) {
836 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100837 } else if (!strcmp(mode, "writeback")) {
838 *flags |= BDRV_O_CACHE_WB;
839 } else if (!strcmp(mode, "unsafe")) {
840 *flags |= BDRV_O_CACHE_WB;
841 *flags |= BDRV_O_NO_FLUSH;
842 } else if (!strcmp(mode, "writethrough")) {
843 /* this is the default */
844 } else {
845 return -1;
846 }
847
848 return 0;
849}
850
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000851/**
852 * The copy-on-read flag is actually a reference count so multiple users may
853 * use the feature without worrying about clobbering its previous state.
854 * Copy-on-read stays enabled until all users have called to disable it.
855 */
856void bdrv_enable_copy_on_read(BlockDriverState *bs)
857{
858 bs->copy_on_read++;
859}
860
861void bdrv_disable_copy_on_read(BlockDriverState *bs)
862{
863 assert(bs->copy_on_read > 0);
864 bs->copy_on_read--;
865}
866
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200867/*
Kevin Wolfb1e6fc02014-05-06 12:11:42 +0200868 * Returns the flags that a temporary snapshot should get, based on the
869 * originally requested flags (the originally requested image will have flags
870 * like a backing file)
871 */
872static int bdrv_temp_snapshot_flags(int flags)
873{
874 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
875}
876
877/*
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200878 * Returns the flags that bs->file should get, based on the given flags for
879 * the parent BDS
880 */
881static int bdrv_inherited_flags(int flags)
882{
883 /* Enable protocol handling, disable format probing for bs->file */
884 flags |= BDRV_O_PROTOCOL;
885
886 /* Our block drivers take care to send flushes and respect unmap policy,
887 * so we can enable both unconditionally on lower layers. */
888 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
889
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200890 /* Clear flags that only apply to the top layer */
Kevin Wolf5669b442014-04-11 21:36:45 +0200891 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
Kevin Wolf0b50cc82014-04-11 21:29:52 +0200892
893 return flags;
894}
895
Kevin Wolf317fc442014-04-25 13:27:34 +0200896/*
897 * Returns the flags that bs->backing_hd should get, based on the given flags
898 * for the parent BDS
899 */
900static int bdrv_backing_flags(int flags)
901{
902 /* backing files always opened read-only */
903 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
904
905 /* snapshot=on is handled on the top layer */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200906 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
Kevin Wolf317fc442014-04-25 13:27:34 +0200907
908 return flags;
909}
910
Kevin Wolf7b272452012-11-12 17:05:39 +0100911static int bdrv_open_flags(BlockDriverState *bs, int flags)
912{
913 int open_flags = flags | BDRV_O_CACHE_WB;
914
915 /*
916 * Clear flags that are internal to the block layer before opening the
917 * image.
918 */
Kevin Wolf20cca272014-06-04 14:33:27 +0200919 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
Kevin Wolf7b272452012-11-12 17:05:39 +0100920
921 /*
922 * Snapshots should be writable.
923 */
Kevin Wolf8bfea152014-04-11 19:16:36 +0200924 if (flags & BDRV_O_TEMPORARY) {
Kevin Wolf7b272452012-11-12 17:05:39 +0100925 open_flags |= BDRV_O_RDWR;
926 }
927
928 return open_flags;
929}
930
Kevin Wolf636ea372014-01-24 14:11:52 +0100931static void bdrv_assign_node_name(BlockDriverState *bs,
932 const char *node_name,
933 Error **errp)
Benoît Canet6913c0c2014-01-23 21:31:33 +0100934{
935 if (!node_name) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100936 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100937 }
938
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200939 /* Check for empty string or invalid characters */
Markus Armbrusterf5bebbb2014-09-30 13:59:30 +0200940 if (!id_wellformed(node_name)) {
Kevin Wolf9aebf3b2014-09-25 09:54:02 +0200941 error_setg(errp, "Invalid node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100942 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100943 }
944
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100945 /* takes care of avoiding namespaces collisions */
Markus Armbruster7f06d472014-10-07 13:59:12 +0200946 if (blk_by_name(node_name)) {
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100947 error_setg(errp, "node-name=%s is conflicting with a device id",
948 node_name);
Kevin Wolf636ea372014-01-24 14:11:52 +0100949 return;
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100950 }
951
Benoît Canet6913c0c2014-01-23 21:31:33 +0100952 /* takes care of avoiding duplicates node names */
953 if (bdrv_find_node(node_name)) {
954 error_setg(errp, "Duplicate node name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100955 return;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100956 }
957
958 /* copy node name into the bs and insert it into the graph list */
959 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
960 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
Benoît Canet6913c0c2014-01-23 21:31:33 +0100961}
962
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200963/*
Kevin Wolf57915332010-04-14 15:24:50 +0200964 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100965 *
966 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200967 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100968static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200969 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200970{
971 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200972 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100973 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200974 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200975
976 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200977 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100978 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200979
Kevin Wolf45673672013-04-22 17:48:40 +0200980 if (file != NULL) {
981 filename = file->filename;
982 } else {
983 filename = qdict_get_try_str(options, "filename");
984 }
985
Kevin Wolf765003d2014-02-03 14:49:42 +0100986 if (drv->bdrv_needs_filename && !filename) {
987 error_setg(errp, "The '%s' block driver requires a file name",
988 drv->format_name);
989 return -EINVAL;
990 }
991
Kevin Wolf45673672013-04-22 17:48:40 +0200992 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100993
Benoît Canet6913c0c2014-01-23 21:31:33 +0100994 node_name = qdict_get_try_str(options, "node-name");
Kevin Wolf636ea372014-01-24 14:11:52 +0100995 bdrv_assign_node_name(bs, node_name, &local_err);
Markus Armbruster0fb63952014-04-25 16:50:31 +0200996 if (local_err) {
Kevin Wolf636ea372014-01-24 14:11:52 +0100997 error_propagate(errp, local_err);
998 return -EINVAL;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100999 }
1000 qdict_del(options, "node-name");
1001
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001002 /* bdrv_open() with directly using a protocol as drv. This layer is already
1003 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
1004 * and return immediately. */
1005 if (file != NULL && drv->bdrv_file_open) {
1006 bdrv_swap(file, bs);
1007 return 0;
1008 }
1009
Kevin Wolf57915332010-04-14 15:24:50 +02001010 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001011 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001012 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +08001013 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001014 open_flags = bdrv_open_flags(bs, flags);
1015 bs->read_only = !(open_flags & BDRV_O_RDWR);
1016
1017 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001018 error_setg(errp,
1019 !bs->read_only && bdrv_is_whitelisted(drv, true)
1020 ? "Driver '%s' can only be used for read-only devices"
1021 : "Driver '%s' is not whitelisted",
1022 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +08001023 return -ENOTSUP;
1024 }
Kevin Wolf57915332010-04-14 15:24:50 +02001025
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001026 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +02001027 if (flags & BDRV_O_COPY_ON_READ) {
1028 if (!bs->read_only) {
1029 bdrv_enable_copy_on_read(bs);
1030 } else {
1031 error_setg(errp, "Can't use copy-on-read on read-only device");
1032 return -EINVAL;
1033 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001034 }
1035
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001036 if (filename != NULL) {
1037 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1038 } else {
1039 bs->filename[0] = '\0';
1040 }
Max Reitz91af7012014-07-18 20:24:56 +02001041 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +02001042
Kevin Wolf57915332010-04-14 15:24:50 +02001043 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -05001044 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +02001045
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +01001046 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +01001047
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001048 /* Open the image, either directly or using a protocol */
1049 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +01001050 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +02001051 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001052 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001053 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001054 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001055 error_setg(errp, "Can't use '%s' as a block driver for the "
1056 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +02001057 ret = -EINVAL;
1058 goto free_and_fail;
1059 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001060 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001061 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001062 }
1063
Kevin Wolf57915332010-04-14 15:24:50 +02001064 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +01001065 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001066 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +08001067 } else if (bs->filename[0]) {
1068 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +02001069 } else {
1070 error_setg_errno(errp, -ret, "Could not open image");
1071 }
Kevin Wolf57915332010-04-14 15:24:50 +02001072 goto free_and_fail;
1073 }
1074
Markus Armbrustera1f688f2015-03-13 21:09:40 +01001075 if (bs->encrypted) {
1076 error_report("Encrypted images are deprecated");
1077 error_printf("Support for them will be removed in a future release.\n"
1078 "You can use 'qemu-img convert' to convert your image"
1079 " to an unencrypted one.\n");
1080 }
1081
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001082 ret = refresh_total_sectors(bs, bs->total_sectors);
1083 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001084 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001085 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +02001086 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01001087
Kevin Wolf3baca892014-07-16 17:48:16 +02001088 bdrv_refresh_limits(bs, &local_err);
1089 if (local_err) {
1090 error_propagate(errp, local_err);
1091 ret = -EINVAL;
1092 goto free_and_fail;
1093 }
1094
Paolo Bonzinic25f53b2011-11-29 12:42:20 +01001095 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +01001096 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolf57915332010-04-14 15:24:50 +02001097 return 0;
1098
1099free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001100 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -05001101 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +02001102 bs->opaque = NULL;
1103 bs->drv = NULL;
1104 return ret;
1105}
1106
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001107static QDict *parse_json_filename(const char *filename, Error **errp)
1108{
1109 QObject *options_obj;
1110 QDict *options;
1111 int ret;
1112
1113 ret = strstart(filename, "json:", &filename);
1114 assert(ret);
1115
1116 options_obj = qobject_from_json(filename);
1117 if (!options_obj) {
1118 error_setg(errp, "Could not parse the JSON options");
1119 return NULL;
1120 }
1121
1122 if (qobject_type(options_obj) != QTYPE_QDICT) {
1123 qobject_decref(options_obj);
1124 error_setg(errp, "Invalid JSON object given");
1125 return NULL;
1126 }
1127
1128 options = qobject_to_qdict(options_obj);
1129 qdict_flatten(options);
1130
1131 return options;
1132}
1133
Kevin Wolf57915332010-04-14 15:24:50 +02001134/*
Kevin Wolff54120f2014-05-26 11:09:59 +02001135 * Fills in default options for opening images and converts the legacy
1136 * filename/flags pair to option QDict entries.
1137 */
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001138static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
Kevin Wolf17b005f2014-05-27 10:50:29 +02001139 BlockDriver *drv, Error **errp)
Kevin Wolff54120f2014-05-26 11:09:59 +02001140{
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001141 const char *filename = *pfilename;
Kevin Wolff54120f2014-05-26 11:09:59 +02001142 const char *drvname;
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001143 bool protocol = flags & BDRV_O_PROTOCOL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001144 bool parse_filename = false;
1145 Error *local_err = NULL;
Kevin Wolff54120f2014-05-26 11:09:59 +02001146
Kevin Wolf5e5c4f62014-05-26 11:45:08 +02001147 /* Parse json: pseudo-protocol */
1148 if (filename && g_str_has_prefix(filename, "json:")) {
1149 QDict *json_options = parse_json_filename(filename, &local_err);
1150 if (local_err) {
1151 error_propagate(errp, local_err);
1152 return -EINVAL;
1153 }
1154
1155 /* Options given in the filename have lower priority than options
1156 * specified directly */
1157 qdict_join(*options, json_options, false);
1158 QDECREF(json_options);
1159 *pfilename = filename = NULL;
1160 }
1161
Kevin Wolff54120f2014-05-26 11:09:59 +02001162 /* Fetch the file name from the options QDict if necessary */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001163 if (protocol && filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001164 if (!qdict_haskey(*options, "filename")) {
1165 qdict_put(*options, "filename", qstring_from_str(filename));
1166 parse_filename = true;
1167 } else {
1168 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1169 "the same time");
1170 return -EINVAL;
1171 }
1172 }
1173
1174 /* Find the right block driver */
1175 filename = qdict_get_try_str(*options, "filename");
1176 drvname = qdict_get_try_str(*options, "driver");
1177
Kevin Wolf17b005f2014-05-27 10:50:29 +02001178 if (drv) {
1179 if (drvname) {
1180 error_setg(errp, "Driver specified twice");
1181 return -EINVAL;
1182 }
1183 drvname = drv->format_name;
1184 qdict_put(*options, "driver", qstring_from_str(drvname));
1185 } else {
1186 if (!drvname && protocol) {
1187 if (filename) {
Max Reitzb65a5e12015-02-05 13:58:12 -05001188 drv = bdrv_find_protocol(filename, parse_filename, errp);
Kevin Wolf17b005f2014-05-27 10:50:29 +02001189 if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001190 return -EINVAL;
1191 }
1192
1193 drvname = drv->format_name;
1194 qdict_put(*options, "driver", qstring_from_str(drvname));
1195 } else {
1196 error_setg(errp, "Must specify either driver or file");
Kevin Wolff54120f2014-05-26 11:09:59 +02001197 return -EINVAL;
1198 }
Kevin Wolf17b005f2014-05-27 10:50:29 +02001199 } else if (drvname) {
1200 drv = bdrv_find_format(drvname);
1201 if (!drv) {
1202 error_setg(errp, "Unknown driver '%s'", drvname);
1203 return -ENOENT;
1204 }
Kevin Wolff54120f2014-05-26 11:09:59 +02001205 }
1206 }
1207
Kevin Wolf17b005f2014-05-27 10:50:29 +02001208 assert(drv || !protocol);
Kevin Wolff54120f2014-05-26 11:09:59 +02001209
1210 /* Driver-specific filename parsing */
Kevin Wolf17b005f2014-05-27 10:50:29 +02001211 if (drv && drv->bdrv_parse_filename && parse_filename) {
Kevin Wolff54120f2014-05-26 11:09:59 +02001212 drv->bdrv_parse_filename(filename, *options, &local_err);
1213 if (local_err) {
1214 error_propagate(errp, local_err);
1215 return -EINVAL;
1216 }
1217
1218 if (!drv->bdrv_needs_filename) {
1219 qdict_del(*options, "filename");
1220 }
1221 }
1222
1223 return 0;
1224}
1225
Fam Zheng8d24cce2014-05-23 21:29:45 +08001226void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1227{
1228
Fam Zheng826b6ca2014-05-23 21:29:47 +08001229 if (bs->backing_hd) {
1230 assert(bs->backing_blocker);
1231 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1232 } else if (backing_hd) {
1233 error_setg(&bs->backing_blocker,
1234 "device is used as backing hd of '%s'",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001235 bdrv_get_device_name(bs));
Fam Zheng826b6ca2014-05-23 21:29:47 +08001236 }
1237
Fam Zheng8d24cce2014-05-23 21:29:45 +08001238 bs->backing_hd = backing_hd;
1239 if (!backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001240 error_free(bs->backing_blocker);
1241 bs->backing_blocker = NULL;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001242 goto out;
1243 }
1244 bs->open_flags &= ~BDRV_O_NO_BACKING;
1245 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1246 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1247 backing_hd->drv ? backing_hd->drv->format_name : "");
Fam Zheng826b6ca2014-05-23 21:29:47 +08001248
1249 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1250 /* Otherwise we won't be able to commit due to check in bdrv_commit */
Fam Zhengbb000212014-09-11 13:14:00 +08001251 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
Fam Zheng826b6ca2014-05-23 21:29:47 +08001252 bs->backing_blocker);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001253out:
Kevin Wolf3baca892014-07-16 17:48:16 +02001254 bdrv_refresh_limits(bs, NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001255}
1256
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001257/*
1258 * Opens the backing file for a BlockDriverState if not yet open
1259 *
1260 * options is a QDict of options to pass to the block drivers, or NULL for an
1261 * empty set of options. The reference to the QDict is transferred to this
1262 * function (even on failure), so if the caller intends to reuse the dictionary,
1263 * it needs to use QINCREF() before calling bdrv_file_open.
1264 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001265int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001266{
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001267 char *backing_filename = g_malloc0(PATH_MAX);
Kevin Wolf317fc442014-04-25 13:27:34 +02001268 int ret = 0;
Fam Zheng8d24cce2014-05-23 21:29:45 +08001269 BlockDriverState *backing_hd;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001270 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001271
1272 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001273 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001274 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001275 }
1276
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001277 /* NULL means an empty set of options */
1278 if (options == NULL) {
1279 options = qdict_new();
1280 }
1281
Paolo Bonzini9156df12012-10-18 16:49:17 +02001282 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001283 if (qdict_haskey(options, "file.filename")) {
1284 backing_filename[0] = '\0';
1285 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001286 QDECREF(options);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001287 goto free_exit;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001288 } else {
Max Reitz9f074292014-11-26 17:20:26 +01001289 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1290 &local_err);
1291 if (local_err) {
1292 ret = -EINVAL;
1293 error_propagate(errp, local_err);
1294 QDECREF(options);
1295 goto free_exit;
1296 }
Paolo Bonzini9156df12012-10-18 16:49:17 +02001297 }
1298
Kevin Wolf8ee79e72014-06-04 15:09:35 +02001299 if (!bs->drv || !bs->drv->supports_backing) {
1300 ret = -EINVAL;
1301 error_setg(errp, "Driver doesn't support backing files");
1302 QDECREF(options);
1303 goto free_exit;
1304 }
1305
Markus Armbrustere4e99862014-10-07 13:59:03 +02001306 backing_hd = bdrv_new();
Fam Zheng8d24cce2014-05-23 21:29:45 +08001307
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001308 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1309 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001310 }
1311
Max Reitzf67503e2014-02-18 18:33:05 +01001312 assert(bs->backing_hd == NULL);
Fam Zheng8d24cce2014-05-23 21:29:45 +08001313 ret = bdrv_open(&backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001314 *backing_filename ? backing_filename : NULL, NULL, options,
Kevin Wolfc5f6e492014-11-25 18:12:42 +01001315 bdrv_backing_flags(bs->open_flags), NULL, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001316 if (ret < 0) {
Fam Zheng8d24cce2014-05-23 21:29:45 +08001317 bdrv_unref(backing_hd);
1318 backing_hd = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001319 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001320 error_setg(errp, "Could not open backing file: %s",
1321 error_get_pretty(local_err));
1322 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001323 goto free_exit;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001324 }
Fam Zheng8d24cce2014-05-23 21:29:45 +08001325 bdrv_set_backing_hd(bs, backing_hd);
Peter Feinerd80ac652014-01-08 19:43:25 +00001326
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001327free_exit:
1328 g_free(backing_filename);
1329 return ret;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001330}
1331
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001332/*
Max Reitzda557aa2013-12-20 19:28:11 +01001333 * Opens a disk image whose options are given as BlockdevRef in another block
1334 * device's options.
1335 *
Max Reitzda557aa2013-12-20 19:28:11 +01001336 * If allow_none is true, no image will be opened if filename is false and no
1337 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1338 *
1339 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1340 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1341 * itself, all options starting with "${bdref_key}." are considered part of the
1342 * BlockdevRef.
1343 *
1344 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001345 *
1346 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001347 */
1348int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1349 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001350 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001351{
1352 QDict *image_options;
1353 int ret;
1354 char *bdref_key_dot;
1355 const char *reference;
1356
Max Reitzf67503e2014-02-18 18:33:05 +01001357 assert(pbs);
1358 assert(*pbs == NULL);
1359
Max Reitzda557aa2013-12-20 19:28:11 +01001360 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1361 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1362 g_free(bdref_key_dot);
1363
1364 reference = qdict_get_try_str(options, bdref_key);
1365 if (!filename && !reference && !qdict_size(image_options)) {
1366 if (allow_none) {
1367 ret = 0;
1368 } else {
1369 error_setg(errp, "A block device must be specified for \"%s\"",
1370 bdref_key);
1371 ret = -EINVAL;
1372 }
Markus Armbrusterb20e61e2014-05-28 11:16:57 +02001373 QDECREF(image_options);
Max Reitzda557aa2013-12-20 19:28:11 +01001374 goto done;
1375 }
1376
Max Reitzf7d9fd82014-02-18 18:33:12 +01001377 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001378
1379done:
1380 qdict_del(options, bdref_key);
1381 return ret;
1382}
1383
Chen Gang6b8aeca2014-06-23 23:28:23 +08001384int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
Kevin Wolfb9988752014-04-03 12:09:34 +02001385{
1386 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001387 char *tmp_filename = g_malloc0(PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001388 int64_t total_size;
Chunyan Liu83d05212014-06-05 17:20:51 +08001389 QemuOpts *opts = NULL;
Kevin Wolfb9988752014-04-03 12:09:34 +02001390 QDict *snapshot_options;
1391 BlockDriverState *bs_snapshot;
1392 Error *local_err;
1393 int ret;
1394
1395 /* if snapshot, we create a temporary backing file and open it
1396 instead of opening 'filename' directly */
1397
1398 /* Get the required size from the image */
Kevin Wolff1877432014-04-04 17:07:19 +02001399 total_size = bdrv_getlength(bs);
1400 if (total_size < 0) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001401 ret = total_size;
Kevin Wolff1877432014-04-04 17:07:19 +02001402 error_setg_errno(errp, -total_size, "Could not get image size");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001403 goto out;
Kevin Wolff1877432014-04-04 17:07:19 +02001404 }
Kevin Wolfb9988752014-04-03 12:09:34 +02001405
1406 /* Create the temporary image */
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001407 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
Kevin Wolfb9988752014-04-03 12:09:34 +02001408 if (ret < 0) {
1409 error_setg_errno(errp, -ret, "Could not get temporary filename");
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001410 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001411 }
1412
Max Reitzef810432014-12-02 18:32:42 +01001413 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
Chunyan Liuc282e1f2014-06-05 17:21:11 +08001414 &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01001415 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
Max Reitzef810432014-12-02 18:32:42 +01001416 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08001417 qemu_opts_del(opts);
Kevin Wolfb9988752014-04-03 12:09:34 +02001418 if (ret < 0) {
1419 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1420 "'%s': %s", tmp_filename,
1421 error_get_pretty(local_err));
1422 error_free(local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001423 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001424 }
1425
1426 /* Prepare a new options QDict for the temporary file */
1427 snapshot_options = qdict_new();
1428 qdict_put(snapshot_options, "file.driver",
1429 qstring_from_str("file"));
1430 qdict_put(snapshot_options, "file.filename",
1431 qstring_from_str(tmp_filename));
1432
Markus Armbrustere4e99862014-10-07 13:59:03 +02001433 bs_snapshot = bdrv_new();
Kevin Wolfb9988752014-04-03 12:09:34 +02001434
1435 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
Max Reitzef810432014-12-02 18:32:42 +01001436 flags, &bdrv_qcow2, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001437 if (ret < 0) {
1438 error_propagate(errp, local_err);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001439 goto out;
Kevin Wolfb9988752014-04-03 12:09:34 +02001440 }
1441
1442 bdrv_append(bs_snapshot, bs);
Benoît Canet1ba4b6a2014-04-22 17:05:27 +02001443
1444out:
1445 g_free(tmp_filename);
Chen Gang6b8aeca2014-06-23 23:28:23 +08001446 return ret;
Kevin Wolfb9988752014-04-03 12:09:34 +02001447}
1448
Max Reitzda557aa2013-12-20 19:28:11 +01001449/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001450 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001451 *
1452 * options is a QDict of options to pass to the block drivers, or NULL for an
1453 * empty set of options. The reference to the QDict belongs to the block layer
1454 * after the call (even on failure), so if the caller intends to reuse the
1455 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001456 *
1457 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1458 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001459 *
1460 * The reference parameter may be used to specify an existing block device which
1461 * should be opened. If specified, neither options nor a filename may be given,
1462 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001463 */
Max Reitzddf56362014-02-18 18:33:06 +01001464int bdrv_open(BlockDriverState **pbs, const char *filename,
1465 const char *reference, QDict *options, int flags,
1466 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001467{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001468 int ret;
Max Reitzf67503e2014-02-18 18:33:05 +01001469 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001470 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001471 Error *local_err = NULL;
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001472 int snapshot_flags = 0;
bellard712e7872005-04-28 21:09:32 +00001473
Max Reitzf67503e2014-02-18 18:33:05 +01001474 assert(pbs);
1475
Max Reitzddf56362014-02-18 18:33:06 +01001476 if (reference) {
1477 bool options_non_empty = options ? qdict_size(options) : false;
1478 QDECREF(options);
1479
1480 if (*pbs) {
1481 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1482 "another block device");
1483 return -EINVAL;
1484 }
1485
1486 if (filename || options_non_empty) {
1487 error_setg(errp, "Cannot reference an existing block device with "
1488 "additional options or a new filename");
1489 return -EINVAL;
1490 }
1491
1492 bs = bdrv_lookup_bs(reference, reference, errp);
1493 if (!bs) {
1494 return -ENODEV;
1495 }
1496 bdrv_ref(bs);
1497 *pbs = bs;
1498 return 0;
1499 }
1500
Max Reitzf67503e2014-02-18 18:33:05 +01001501 if (*pbs) {
1502 bs = *pbs;
1503 } else {
Markus Armbrustere4e99862014-10-07 13:59:03 +02001504 bs = bdrv_new();
Max Reitzf67503e2014-02-18 18:33:05 +01001505 }
1506
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001507 /* NULL means an empty set of options */
1508 if (options == NULL) {
1509 options = qdict_new();
1510 }
1511
Kevin Wolf17b005f2014-05-27 10:50:29 +02001512 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
Kevin Wolf462f5bc2014-05-26 11:39:55 +02001513 if (local_err) {
1514 goto fail;
1515 }
1516
Kevin Wolf76c591b2014-06-04 14:19:44 +02001517 /* Find the right image format driver */
1518 drv = NULL;
1519 drvname = qdict_get_try_str(options, "driver");
1520 if (drvname) {
1521 drv = bdrv_find_format(drvname);
1522 qdict_del(options, "driver");
1523 if (!drv) {
1524 error_setg(errp, "Unknown driver: '%s'", drvname);
1525 ret = -EINVAL;
1526 goto fail;
1527 }
1528 }
1529
1530 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1531 if (drv && !drv->bdrv_file_open) {
1532 /* If the user explicitly wants a format driver here, we'll need to add
1533 * another layer for the protocol in bs->file */
1534 flags &= ~BDRV_O_PROTOCOL;
1535 }
1536
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001537 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001538 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001539
Kevin Wolff4788ad2014-06-03 16:44:19 +02001540 /* Open image file without format layer */
1541 if ((flags & BDRV_O_PROTOCOL) == 0) {
1542 if (flags & BDRV_O_RDWR) {
1543 flags |= BDRV_O_ALLOW_RDWR;
1544 }
1545 if (flags & BDRV_O_SNAPSHOT) {
1546 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1547 flags = bdrv_backing_flags(flags);
1548 }
1549
1550 assert(file == NULL);
1551 ret = bdrv_open_image(&file, filename, options, "file",
1552 bdrv_inherited_flags(flags),
1553 true, &local_err);
1554 if (ret < 0) {
Max Reitz5469a2a2014-02-18 18:33:10 +01001555 goto fail;
1556 }
1557 }
1558
Kevin Wolf76c591b2014-06-04 14:19:44 +02001559 /* Image format probing */
Kevin Wolf38f3ef52014-11-20 16:27:12 +01001560 bs->probed = !drv;
Kevin Wolf76c591b2014-06-04 14:19:44 +02001561 if (!drv && file) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001562 ret = find_image_format(file, filename, &drv, &local_err);
1563 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001564 goto fail;
Max Reitz2a05cbe2013-12-20 19:28:10 +01001565 }
Kevin Wolf76c591b2014-06-04 14:19:44 +02001566 } else if (!drv) {
Kevin Wolf17b005f2014-05-27 10:50:29 +02001567 error_setg(errp, "Must specify either driver or file");
1568 ret = -EINVAL;
Kevin Wolf8bfea152014-04-11 19:16:36 +02001569 goto fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001570 }
1571
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001572 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001573 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001574 if (ret < 0) {
Kevin Wolf8bfea152014-04-11 19:16:36 +02001575 goto fail;
Christoph Hellwig69873072010-01-20 18:13:25 +01001576 }
1577
Max Reitz2a05cbe2013-12-20 19:28:10 +01001578 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001579 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001580 file = NULL;
1581 }
1582
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001583 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001584 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001585 QDict *backing_options;
1586
Benoît Canet5726d872013-09-25 13:30:01 +02001587 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001588 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001589 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001590 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001591 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001592 }
1593
Max Reitz91af7012014-07-18 20:24:56 +02001594 bdrv_refresh_filename(bs);
1595
Kevin Wolfb9988752014-04-03 12:09:34 +02001596 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1597 * temporary snapshot afterwards. */
Kevin Wolfb1e6fc02014-05-06 12:11:42 +02001598 if (snapshot_flags) {
Chen Gang6b8aeca2014-06-23 23:28:23 +08001599 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
Kevin Wolfb9988752014-04-03 12:09:34 +02001600 if (local_err) {
Kevin Wolfb9988752014-04-03 12:09:34 +02001601 goto close_and_fail;
1602 }
1603 }
1604
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001605 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001606 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001607 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001608 if (flags & BDRV_O_PROTOCOL) {
1609 error_setg(errp, "Block protocol '%s' doesn't support the option "
1610 "'%s'", drv->format_name, entry->key);
1611 } else {
1612 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1613 "support the option '%s'", drv->format_name,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001614 bdrv_get_device_name(bs), entry->key);
Max Reitz5acd9d82014-02-18 18:33:11 +01001615 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001616
1617 ret = -EINVAL;
1618 goto close_and_fail;
1619 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001620
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001621 if (!bdrv_key_required(bs)) {
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001622 if (bs->blk) {
1623 blk_dev_change_media_cb(bs->blk, true);
1624 }
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001625 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1626 && !runstate_check(RUN_STATE_INMIGRATE)
1627 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1628 error_setg(errp,
1629 "Guest must be stopped for opening of encrypted image");
1630 ret = -EBUSY;
1631 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001632 }
1633
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001634 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001635 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001636 return 0;
1637
Kevin Wolf8bfea152014-04-11 19:16:36 +02001638fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001639 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001640 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001641 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001642 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001643 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001644 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001645 if (!*pbs) {
1646 /* If *pbs is NULL, a new BDS has been created in this function and
1647 needs to be freed now. Otherwise, it does not need to be closed,
1648 since it has not really been opened yet. */
1649 bdrv_unref(bs);
1650 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001651 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001652 error_propagate(errp, local_err);
1653 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001654 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001655
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001656close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001657 /* See fail path, but now the BDS has to be always closed */
1658 if (*pbs) {
1659 bdrv_close(bs);
1660 } else {
1661 bdrv_unref(bs);
1662 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001663 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001664 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001665 error_propagate(errp, local_err);
1666 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001667 return ret;
1668}
1669
Jeff Codye971aa12012-09-20 15:13:19 -04001670typedef struct BlockReopenQueueEntry {
1671 bool prepared;
1672 BDRVReopenState state;
1673 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1674} BlockReopenQueueEntry;
1675
1676/*
1677 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1678 * reopen of multiple devices.
1679 *
1680 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1681 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1682 * be created and initialized. This newly created BlockReopenQueue should be
1683 * passed back in for subsequent calls that are intended to be of the same
1684 * atomic 'set'.
1685 *
1686 * bs is the BlockDriverState to add to the reopen queue.
1687 *
1688 * flags contains the open flags for the associated bs
1689 *
1690 * returns a pointer to bs_queue, which is either the newly allocated
1691 * bs_queue, or the existing bs_queue being used.
1692 *
1693 */
1694BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1695 BlockDriverState *bs, int flags)
1696{
1697 assert(bs != NULL);
1698
1699 BlockReopenQueueEntry *bs_entry;
1700 if (bs_queue == NULL) {
1701 bs_queue = g_new0(BlockReopenQueue, 1);
1702 QSIMPLEQ_INIT(bs_queue);
1703 }
1704
Kevin Wolff1f25a22014-04-25 19:04:55 +02001705 /* bdrv_open() masks this flag out */
1706 flags &= ~BDRV_O_PROTOCOL;
1707
Jeff Codye971aa12012-09-20 15:13:19 -04001708 if (bs->file) {
Kevin Wolff1f25a22014-04-25 19:04:55 +02001709 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
Jeff Codye971aa12012-09-20 15:13:19 -04001710 }
1711
1712 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1713 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1714
1715 bs_entry->state.bs = bs;
1716 bs_entry->state.flags = flags;
1717
1718 return bs_queue;
1719}
1720
1721/*
1722 * Reopen multiple BlockDriverStates atomically & transactionally.
1723 *
1724 * The queue passed in (bs_queue) must have been built up previous
1725 * via bdrv_reopen_queue().
1726 *
1727 * Reopens all BDS specified in the queue, with the appropriate
1728 * flags. All devices are prepared for reopen, and failure of any
1729 * device will cause all device changes to be abandonded, and intermediate
1730 * data cleaned up.
1731 *
1732 * If all devices prepare successfully, then the changes are committed
1733 * to all devices.
1734 *
1735 */
1736int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1737{
1738 int ret = -1;
1739 BlockReopenQueueEntry *bs_entry, *next;
1740 Error *local_err = NULL;
1741
1742 assert(bs_queue != NULL);
1743
1744 bdrv_drain_all();
1745
1746 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1747 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1748 error_propagate(errp, local_err);
1749 goto cleanup;
1750 }
1751 bs_entry->prepared = true;
1752 }
1753
1754 /* If we reach this point, we have success and just need to apply the
1755 * changes
1756 */
1757 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1758 bdrv_reopen_commit(&bs_entry->state);
1759 }
1760
1761 ret = 0;
1762
1763cleanup:
1764 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1765 if (ret && bs_entry->prepared) {
1766 bdrv_reopen_abort(&bs_entry->state);
1767 }
1768 g_free(bs_entry);
1769 }
1770 g_free(bs_queue);
1771 return ret;
1772}
1773
1774
1775/* Reopen a single BlockDriverState with the specified flags. */
1776int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1777{
1778 int ret = -1;
1779 Error *local_err = NULL;
1780 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1781
1782 ret = bdrv_reopen_multiple(queue, &local_err);
1783 if (local_err != NULL) {
1784 error_propagate(errp, local_err);
1785 }
1786 return ret;
1787}
1788
1789
1790/*
1791 * Prepares a BlockDriverState for reopen. All changes are staged in the
1792 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1793 * the block driver layer .bdrv_reopen_prepare()
1794 *
1795 * bs is the BlockDriverState to reopen
1796 * flags are the new open flags
1797 * queue is the reopen queue
1798 *
1799 * Returns 0 on success, non-zero on error. On error errp will be set
1800 * as well.
1801 *
1802 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1803 * It is the responsibility of the caller to then call the abort() or
1804 * commit() for any other BDS that have been left in a prepare() state
1805 *
1806 */
1807int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1808 Error **errp)
1809{
1810 int ret = -1;
1811 Error *local_err = NULL;
1812 BlockDriver *drv;
1813
1814 assert(reopen_state != NULL);
1815 assert(reopen_state->bs->drv != NULL);
1816 drv = reopen_state->bs->drv;
1817
1818 /* if we are to stay read-only, do not allow permission change
1819 * to r/w */
1820 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1821 reopen_state->flags & BDRV_O_RDWR) {
1822 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001823 bdrv_get_device_name(reopen_state->bs));
Jeff Codye971aa12012-09-20 15:13:19 -04001824 goto error;
1825 }
1826
1827
1828 ret = bdrv_flush(reopen_state->bs);
1829 if (ret) {
1830 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1831 strerror(-ret));
1832 goto error;
1833 }
1834
1835 if (drv->bdrv_reopen_prepare) {
1836 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1837 if (ret) {
1838 if (local_err != NULL) {
1839 error_propagate(errp, local_err);
1840 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001841 error_setg(errp, "failed while preparing to reopen image '%s'",
1842 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001843 }
1844 goto error;
1845 }
1846 } else {
1847 /* It is currently mandatory to have a bdrv_reopen_prepare()
1848 * handler for each supported drv. */
1849 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02001850 drv->format_name, bdrv_get_device_name(reopen_state->bs),
Jeff Codye971aa12012-09-20 15:13:19 -04001851 "reopening of file");
1852 ret = -1;
1853 goto error;
1854 }
1855
1856 ret = 0;
1857
1858error:
1859 return ret;
1860}
1861
1862/*
1863 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1864 * makes them final by swapping the staging BlockDriverState contents into
1865 * the active BlockDriverState contents.
1866 */
1867void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1868{
1869 BlockDriver *drv;
1870
1871 assert(reopen_state != NULL);
1872 drv = reopen_state->bs->drv;
1873 assert(drv != NULL);
1874
1875 /* If there are any driver level actions to take */
1876 if (drv->bdrv_reopen_commit) {
1877 drv->bdrv_reopen_commit(reopen_state);
1878 }
1879
1880 /* set BDS specific flags now */
1881 reopen_state->bs->open_flags = reopen_state->flags;
1882 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1883 BDRV_O_CACHE_WB);
1884 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001885
Kevin Wolf3baca892014-07-16 17:48:16 +02001886 bdrv_refresh_limits(reopen_state->bs, NULL);
Jeff Codye971aa12012-09-20 15:13:19 -04001887}
1888
1889/*
1890 * Abort the reopen, and delete and free the staged changes in
1891 * reopen_state
1892 */
1893void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1894{
1895 BlockDriver *drv;
1896
1897 assert(reopen_state != NULL);
1898 drv = reopen_state->bs->drv;
1899 assert(drv != NULL);
1900
1901 if (drv->bdrv_reopen_abort) {
1902 drv->bdrv_reopen_abort(reopen_state);
1903 }
1904}
1905
1906
bellardfc01f7e2003-06-30 10:03:06 +00001907void bdrv_close(BlockDriverState *bs)
1908{
Max Reitz33384422014-06-20 21:57:33 +02001909 BdrvAioNotifier *ban, *ban_next;
1910
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001911 if (bs->job) {
1912 block_job_cancel_sync(bs->job);
1913 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001914 bdrv_drain_all(); /* complete I/O */
1915 bdrv_flush(bs);
1916 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001917 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001918
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001919 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001920 if (bs->backing_hd) {
Fam Zheng826b6ca2014-05-23 21:29:47 +08001921 BlockDriverState *backing_hd = bs->backing_hd;
1922 bdrv_set_backing_hd(bs, NULL);
1923 bdrv_unref(backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001924 }
bellardea2384d2004-08-01 21:59:26 +00001925 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001926 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001927 bs->opaque = NULL;
1928 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001929 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001930 bs->backing_file[0] = '\0';
1931 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001932 bs->total_sectors = 0;
1933 bs->encrypted = 0;
1934 bs->valid_key = 0;
1935 bs->sg = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001936 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001937 QDECREF(bs->options);
1938 bs->options = NULL;
Max Reitz91af7012014-07-18 20:24:56 +02001939 QDECREF(bs->full_open_options);
1940 bs->full_open_options = NULL;
bellardb3380822004-03-14 21:38:54 +00001941
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001942 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001943 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001944 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001945 }
bellardb3380822004-03-14 21:38:54 +00001946 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001947
Markus Armbrustera7f53e22014-10-07 13:59:25 +02001948 if (bs->blk) {
1949 blk_dev_change_media_cb(bs->blk, false);
1950 }
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001951
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001952 /*throttling disk I/O limits*/
1953 if (bs->io_limits_enabled) {
1954 bdrv_io_limits_disable(bs);
1955 }
Max Reitz33384422014-06-20 21:57:33 +02001956
1957 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1958 g_free(ban);
1959 }
1960 QLIST_INIT(&bs->aio_notifiers);
bellardb3380822004-03-14 21:38:54 +00001961}
1962
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001963void bdrv_close_all(void)
1964{
1965 BlockDriverState *bs;
1966
Benoît Canetdc364f42014-01-23 21:31:32 +01001967 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001968 AioContext *aio_context = bdrv_get_aio_context(bs);
1969
1970 aio_context_acquire(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001971 bdrv_close(bs);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02001972 aio_context_release(aio_context);
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001973 }
1974}
1975
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001976/* Check if any requests are in-flight (including throttled requests) */
1977static bool bdrv_requests_pending(BlockDriverState *bs)
1978{
1979 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1980 return true;
1981 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001982 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1983 return true;
1984 }
1985 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001986 return true;
1987 }
1988 if (bs->file && bdrv_requests_pending(bs->file)) {
1989 return true;
1990 }
1991 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1992 return true;
1993 }
1994 return false;
1995}
1996
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01001997static bool bdrv_drain_one(BlockDriverState *bs)
1998{
1999 bool bs_busy;
2000
2001 bdrv_flush_io_queue(bs);
2002 bdrv_start_throttled_reqs(bs);
2003 bs_busy = bdrv_requests_pending(bs);
2004 bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
2005 return bs_busy;
2006}
2007
2008/*
2009 * Wait for pending requests to complete on a single BlockDriverState subtree
2010 *
2011 * See the warning in bdrv_drain_all(). This function can only be called if
2012 * you are sure nothing can generate I/O because you have op blockers
2013 * installed.
2014 *
2015 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
2016 * AioContext.
2017 */
2018void bdrv_drain(BlockDriverState *bs)
2019{
2020 while (bdrv_drain_one(bs)) {
2021 /* Keep iterating */
2022 }
2023}
2024
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002025/*
2026 * Wait for pending requests to complete across all BlockDriverStates
2027 *
2028 * This function does not flush data to disk, use bdrv_flush_all() for that
2029 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02002030 *
2031 * Note that completion of an asynchronous I/O operation can trigger any
2032 * number of other I/O operations on other devices---for example a coroutine
2033 * can be arbitrarily complex and a constant flow of I/O can come until the
2034 * coroutine is complete. Because of this, it is not possible to have a
2035 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002036 */
2037void bdrv_drain_all(void)
2038{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002039 /* Always run first iteration so any pending completion BHs run */
2040 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002041 BlockDriverState *bs;
2042
Fam Zheng69da3b02015-04-03 22:05:19 +08002043 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2044 AioContext *aio_context = bdrv_get_aio_context(bs);
2045
2046 aio_context_acquire(aio_context);
2047 if (bs->job) {
2048 block_job_pause(bs->job);
2049 }
2050 aio_context_release(aio_context);
2051 }
2052
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02002053 while (busy) {
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002054 busy = false;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002055
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002056 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2057 AioContext *aio_context = bdrv_get_aio_context(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002058
2059 aio_context_acquire(aio_context);
Stefan Hajnoczi5b98db02014-10-21 12:03:55 +01002060 busy |= bdrv_drain_one(bs);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002061 aio_context_release(aio_context);
Stefan Hajnoczi9b536ad2014-05-08 16:34:36 +02002062 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002063 }
Fam Zheng69da3b02015-04-03 22:05:19 +08002064
2065 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2066 AioContext *aio_context = bdrv_get_aio_context(bs);
2067
2068 aio_context_acquire(aio_context);
2069 if (bs->job) {
2070 block_job_resume(bs->job);
2071 }
2072 aio_context_release(aio_context);
2073 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00002074}
2075
Benoît Canetdc364f42014-01-23 21:31:32 +01002076/* make a BlockDriverState anonymous by removing from bdrv_state and
2077 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05002078 Also, NULL terminate the device_name to prevent double remove */
2079void bdrv_make_anon(BlockDriverState *bs)
2080{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002081 /*
2082 * Take care to remove bs from bdrv_states only when it's actually
2083 * in it. Note that bs->device_list.tqe_prev is initially null,
2084 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2085 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2086 * resetting it to null on remove.
2087 */
2088 if (bs->device_list.tqe_prev) {
Benoît Canetdc364f42014-01-23 21:31:32 +01002089 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002090 bs->device_list.tqe_prev = NULL;
Ryan Harperd22b2f42011-03-29 20:51:47 -05002091 }
Benoît Canetdc364f42014-01-23 21:31:32 +01002092 if (bs->node_name[0] != '\0') {
2093 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2094 }
2095 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05002096}
2097
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02002098static void bdrv_rebind(BlockDriverState *bs)
2099{
2100 if (bs->drv && bs->drv->bdrv_rebind) {
2101 bs->drv->bdrv_rebind(bs);
2102 }
2103}
2104
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002105static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2106 BlockDriverState *bs_src)
2107{
2108 /* move some fields that need to stay attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002109
2110 /* dev info */
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01002111 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002112 bs_dest->copy_on_read = bs_src->copy_on_read;
2113
2114 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2115
Benoît Canetcc0681c2013-09-02 14:14:39 +02002116 /* i/o throttled req */
2117 memcpy(&bs_dest->throttle_state,
2118 &bs_src->throttle_state,
2119 sizeof(ThrottleState));
2120 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2121 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002122 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2123
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002124 /* r/w error */
2125 bs_dest->on_read_error = bs_src->on_read_error;
2126 bs_dest->on_write_error = bs_src->on_write_error;
2127
2128 /* i/o status */
2129 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2130 bs_dest->iostatus = bs_src->iostatus;
2131
2132 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08002133 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002134
Fam Zheng9fcb0252013-08-23 09:14:46 +08002135 /* reference count */
2136 bs_dest->refcnt = bs_src->refcnt;
2137
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002138 /* job */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002139 bs_dest->job = bs_src->job;
2140
2141 /* keep the same entry in bdrv_states */
Benoît Canetdc364f42014-01-23 21:31:32 +01002142 bs_dest->device_list = bs_src->device_list;
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002143 bs_dest->blk = bs_src->blk;
2144
Fam Zhengfbe40ff2014-05-23 21:29:42 +08002145 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2146 sizeof(bs_dest->op_blockers));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002147}
2148
2149/*
2150 * Swap bs contents for two image chains while they are live,
2151 * while keeping required fields on the BlockDriverState that is
2152 * actually attached to a device.
2153 *
2154 * This will modify the BlockDriverState fields, and swap contents
2155 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2156 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002157 * bs_new must not be attached to a BlockBackend.
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002158 *
2159 * This function does not create any image files.
2160 */
2161void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2162{
2163 BlockDriverState tmp;
2164
Benoît Canet90ce8a02014-03-05 23:48:29 +01002165 /* The code needs to swap the node_name but simply swapping node_list won't
2166 * work so first remove the nodes from the graph list, do the swap then
2167 * insert them back if needed.
2168 */
2169 if (bs_new->node_name[0] != '\0') {
2170 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2171 }
2172 if (bs_old->node_name[0] != '\0') {
2173 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2174 }
2175
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002176 /* bs_new must be unattached and shouldn't have anything fancy enabled */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002177 assert(!bs_new->blk);
Fam Zhenge4654d22013-11-13 18:29:43 +08002178 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002179 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002180 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002181 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002182
2183 tmp = *bs_new;
2184 *bs_new = *bs_old;
2185 *bs_old = tmp;
2186
2187 /* there are some fields that should not be swapped, move them back */
2188 bdrv_move_feature_fields(&tmp, bs_old);
2189 bdrv_move_feature_fields(bs_old, bs_new);
2190 bdrv_move_feature_fields(bs_new, &tmp);
2191
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002192 /* bs_new must remain unattached */
Markus Armbruster7e7d56d2014-10-07 13:59:05 +02002193 assert(!bs_new->blk);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002194
2195 /* Check a few fields that should remain attached to the device */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002196 assert(bs_new->job == NULL);
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002197 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02002198 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002199
Benoît Canet90ce8a02014-03-05 23:48:29 +01002200 /* insert the nodes back into the graph node list if needed */
2201 if (bs_new->node_name[0] != '\0') {
2202 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2203 }
2204 if (bs_old->node_name[0] != '\0') {
2205 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2206 }
2207
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002208 bdrv_rebind(bs_new);
2209 bdrv_rebind(bs_old);
2210}
2211
Jeff Cody8802d1f2012-02-28 15:54:06 -05002212/*
2213 * Add new bs contents at the top of an image chain while the chain is
2214 * live, while keeping required fields on the top layer.
2215 *
2216 * This will modify the BlockDriverState fields, and swap contents
2217 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2218 *
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02002219 * bs_new must not be attached to a BlockBackend.
Jeff Codyf6801b82012-03-27 16:30:19 -04002220 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05002221 * This function does not create any image files.
2222 */
2223void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2224{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02002225 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002226
2227 /* The contents of 'tmp' will become bs_top, as we are
2228 * swapping bs_new and bs_top contents. */
Fam Zheng8d24cce2014-05-23 21:29:45 +08002229 bdrv_set_backing_hd(bs_top, bs_new);
Jeff Cody8802d1f2012-02-28 15:54:06 -05002230}
2231
Fam Zheng4f6fd342013-08-23 09:14:47 +08002232static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00002233{
Paolo Bonzini3e914652012-03-30 13:17:11 +02002234 assert(!bs->job);
Fam Zheng3718d8a2014-05-23 21:29:43 +08002235 assert(bdrv_op_blocker_is_empty(bs));
Fam Zheng4f6fd342013-08-23 09:14:47 +08002236 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08002237 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02002238
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02002239 bdrv_close(bs);
2240
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01002241 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05002242 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00002243
Anthony Liguori7267c092011-08-20 22:09:37 -05002244 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00002245}
2246
aliguorie97fc192009-04-21 23:11:50 +00002247/*
2248 * Run consistency checks on an image
2249 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002250 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002251 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002252 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002253 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002254int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002255{
Max Reitz908bcd52014-08-07 22:47:55 +02002256 if (bs->drv == NULL) {
2257 return -ENOMEDIUM;
2258 }
aliguorie97fc192009-04-21 23:11:50 +00002259 if (bs->drv->bdrv_check == NULL) {
2260 return -ENOTSUP;
2261 }
2262
Kevin Wolfe076f332010-06-29 11:43:13 +02002263 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002264 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002265}
2266
Kevin Wolf8a426612010-07-16 17:17:01 +02002267#define COMMIT_BUF_SECTORS 2048
2268
bellard33e39632003-07-06 17:15:21 +00002269/* commit COW file into the raw image */
2270int bdrv_commit(BlockDriverState *bs)
2271{
bellard19cb3732006-08-19 11:45:59 +00002272 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002273 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002274 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002275 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002276 uint8_t *buf = NULL;
bellard33e39632003-07-06 17:15:21 +00002277
bellard19cb3732006-08-19 11:45:59 +00002278 if (!drv)
2279 return -ENOMEDIUM;
Liu Yuan6bb45152014-09-01 13:35:21 +08002280
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002281 if (!bs->backing_hd) {
2282 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002283 }
2284
Fam Zhengbb000212014-09-11 13:14:00 +08002285 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2286 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002287 return -EBUSY;
2288 }
2289
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002290 ro = bs->backing_hd->read_only;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002291 open_flags = bs->backing_hd->open_flags;
2292
2293 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002294 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2295 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002296 }
bellard33e39632003-07-06 17:15:21 +00002297 }
bellardea2384d2004-08-01 21:59:26 +00002298
Jeff Cody72706ea2014-01-24 09:02:35 -05002299 length = bdrv_getlength(bs);
2300 if (length < 0) {
2301 ret = length;
2302 goto ro_cleanup;
2303 }
2304
2305 backing_length = bdrv_getlength(bs->backing_hd);
2306 if (backing_length < 0) {
2307 ret = backing_length;
2308 goto ro_cleanup;
2309 }
2310
2311 /* If our top snapshot is larger than the backing file image,
2312 * grow the backing file image if possible. If not possible,
2313 * we must return an error */
2314 if (length > backing_length) {
2315 ret = bdrv_truncate(bs->backing_hd, length);
2316 if (ret < 0) {
2317 goto ro_cleanup;
2318 }
2319 }
2320
2321 total_sectors = length >> BDRV_SECTOR_BITS;
Kevin Wolf857d4f42014-05-20 13:16:51 +02002322
2323 /* qemu_try_blockalign() for bs will choose an alignment that works for
2324 * bs->backing_hd as well, so no need to compare the alignment manually. */
2325 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2326 if (buf == NULL) {
2327 ret = -ENOMEM;
2328 goto ro_cleanup;
2329 }
bellardea2384d2004-08-01 21:59:26 +00002330
Kevin Wolf8a426612010-07-16 17:17:01 +02002331 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002332 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2333 if (ret < 0) {
2334 goto ro_cleanup;
2335 }
2336 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002337 ret = bdrv_read(bs, sector, buf, n);
2338 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002339 goto ro_cleanup;
2340 }
2341
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002342 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2343 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002344 goto ro_cleanup;
2345 }
bellardea2384d2004-08-01 21:59:26 +00002346 }
2347 }
bellard95389c82005-12-18 18:28:15 +00002348
Christoph Hellwig1d449522010-01-17 12:32:30 +01002349 if (drv->bdrv_make_empty) {
2350 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002351 if (ret < 0) {
2352 goto ro_cleanup;
2353 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002354 bdrv_flush(bs);
2355 }
bellard95389c82005-12-18 18:28:15 +00002356
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002357 /*
2358 * Make sure all data we wrote to the backing device is actually
2359 * stable on disk.
2360 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002361 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002362 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002363 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002364
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002365 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002366ro_cleanup:
Kevin Wolf857d4f42014-05-20 13:16:51 +02002367 qemu_vfree(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002368
2369 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002370 /* ignoring error return here */
2371 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002372 }
2373
Christoph Hellwig1d449522010-01-17 12:32:30 +01002374 return ret;
bellard33e39632003-07-06 17:15:21 +00002375}
2376
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002377int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002378{
2379 BlockDriverState *bs;
2380
Benoît Canetdc364f42014-01-23 21:31:32 +01002381 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002382 AioContext *aio_context = bdrv_get_aio_context(bs);
2383
2384 aio_context_acquire(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002385 if (bs->drv && bs->backing_hd) {
2386 int ret = bdrv_commit(bs);
2387 if (ret < 0) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002388 aio_context_release(aio_context);
Jeff Cody272d2d82013-02-26 09:55:48 -05002389 return ret;
2390 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002391 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02002392 aio_context_release(aio_context);
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002393 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002394 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002395}
2396
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002397/**
2398 * Remove an active request from the tracked requests list
2399 *
2400 * This function should be called when a tracked request is completing.
2401 */
2402static void tracked_request_end(BdrvTrackedRequest *req)
2403{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002404 if (req->serialising) {
2405 req->bs->serialising_in_flight--;
2406 }
2407
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002408 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002409 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002410}
2411
2412/**
2413 * Add an active request to the tracked requests list
2414 */
2415static void tracked_request_begin(BdrvTrackedRequest *req,
2416 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002417 int64_t offset,
2418 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002419{
2420 *req = (BdrvTrackedRequest){
2421 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002422 .offset = offset,
2423 .bytes = bytes,
2424 .is_write = is_write,
2425 .co = qemu_coroutine_self(),
2426 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002427 .overlap_offset = offset,
2428 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002429 };
2430
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002431 qemu_co_queue_init(&req->wait_queue);
2432
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002433 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2434}
2435
Kevin Wolfe96126f2014-02-08 10:42:18 +01002436static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002437{
Kevin Wolf73271452013-12-04 17:08:50 +01002438 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002439 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2440 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002441
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002442 if (!req->serialising) {
2443 req->bs->serialising_in_flight++;
2444 req->serialising = true;
2445 }
Kevin Wolf73271452013-12-04 17:08:50 +01002446
2447 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2448 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002449}
2450
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002451/**
2452 * Round a region to cluster boundaries
2453 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002454void bdrv_round_to_clusters(BlockDriverState *bs,
2455 int64_t sector_num, int nb_sectors,
2456 int64_t *cluster_sector_num,
2457 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002458{
2459 BlockDriverInfo bdi;
2460
2461 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2462 *cluster_sector_num = sector_num;
2463 *cluster_nb_sectors = nb_sectors;
2464 } else {
2465 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2466 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2467 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2468 nb_sectors, c);
2469 }
2470}
2471
Kevin Wolf73271452013-12-04 17:08:50 +01002472static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002473{
2474 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002475 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002476
Kevin Wolf73271452013-12-04 17:08:50 +01002477 ret = bdrv_get_info(bs, &bdi);
2478 if (ret < 0 || bdi.cluster_size == 0) {
2479 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002480 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002481 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002482 }
2483}
2484
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002485static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002486 int64_t offset, unsigned int bytes)
2487{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002488 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002489 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002490 return false;
2491 }
2492 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002493 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002494 return false;
2495 }
2496 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002497}
2498
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002499static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002500{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002501 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002502 BdrvTrackedRequest *req;
2503 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002504 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002505
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002506 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002507 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002508 }
2509
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002510 do {
2511 retry = false;
2512 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002513 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002514 continue;
2515 }
Kevin Wolf73271452013-12-04 17:08:50 +01002516 if (tracked_request_overlaps(req, self->overlap_offset,
2517 self->overlap_bytes))
2518 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002519 /* Hitting this means there was a reentrant request, for
2520 * example, a block driver issuing nested requests. This must
2521 * never happen since it means deadlock.
2522 */
2523 assert(qemu_coroutine_self() != req->co);
2524
Kevin Wolf64604402013-12-13 13:04:35 +01002525 /* If the request is already (indirectly) waiting for us, or
2526 * will wait for us as soon as it wakes up, then just go on
2527 * (instead of producing a deadlock in the former case). */
2528 if (!req->waiting_for) {
2529 self->waiting_for = req;
2530 qemu_co_queue_wait(&req->wait_queue);
2531 self->waiting_for = NULL;
2532 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002533 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002534 break;
2535 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002536 }
2537 }
2538 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002539
2540 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002541}
2542
Kevin Wolf756e6732010-01-12 12:55:17 +01002543/*
2544 * Return values:
2545 * 0 - success
2546 * -EINVAL - backing format specified, but no file
2547 * -ENOSPC - can't update the backing file because no space is left in the
2548 * image file header
2549 * -ENOTSUP - format driver doesn't support changing the backing file
2550 */
2551int bdrv_change_backing_file(BlockDriverState *bs,
2552 const char *backing_file, const char *backing_fmt)
2553{
2554 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002555 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002556
Paolo Bonzini5f377792012-04-12 14:01:01 +02002557 /* Backing file format doesn't make sense without a backing file */
2558 if (backing_fmt && !backing_file) {
2559 return -EINVAL;
2560 }
2561
Kevin Wolf756e6732010-01-12 12:55:17 +01002562 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002563 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002564 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002565 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002566 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002567
2568 if (ret == 0) {
2569 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2570 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2571 }
2572 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002573}
2574
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002575/*
2576 * Finds the image layer in the chain that has 'bs' as its backing file.
2577 *
2578 * active is the current topmost image.
2579 *
2580 * Returns NULL if bs is not found in active's image chain,
2581 * or if active == bs.
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002582 *
2583 * Returns the bottommost base image if bs == NULL.
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002584 */
2585BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2586 BlockDriverState *bs)
2587{
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002588 while (active && bs != active->backing_hd) {
2589 active = active->backing_hd;
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002590 }
2591
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002592 return active;
2593}
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002594
Jeff Cody4caf0fc2014-06-25 15:35:26 -04002595/* Given a BDS, searches for the base layer. */
2596BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2597{
2598 return bdrv_find_overlay(bs, NULL);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002599}
2600
2601typedef struct BlkIntermediateStates {
2602 BlockDriverState *bs;
2603 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2604} BlkIntermediateStates;
2605
2606
2607/*
2608 * Drops images above 'base' up to and including 'top', and sets the image
2609 * above 'top' to have base as its backing file.
2610 *
2611 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2612 * information in 'bs' can be properly updated.
2613 *
2614 * E.g., this will convert the following chain:
2615 * bottom <- base <- intermediate <- top <- active
2616 *
2617 * to
2618 *
2619 * bottom <- base <- active
2620 *
2621 * It is allowed for bottom==base, in which case it converts:
2622 *
2623 * base <- intermediate <- top <- active
2624 *
2625 * to
2626 *
2627 * base <- active
2628 *
Jeff Cody54e26902014-06-25 15:40:10 -04002629 * If backing_file_str is non-NULL, it will be used when modifying top's
2630 * overlay image metadata.
2631 *
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002632 * Error conditions:
2633 * if active == top, that is considered an error
2634 *
2635 */
2636int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
Jeff Cody54e26902014-06-25 15:40:10 -04002637 BlockDriverState *base, const char *backing_file_str)
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002638{
2639 BlockDriverState *intermediate;
2640 BlockDriverState *base_bs = NULL;
2641 BlockDriverState *new_top_bs = NULL;
2642 BlkIntermediateStates *intermediate_state, *next;
2643 int ret = -EIO;
2644
2645 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2646 QSIMPLEQ_INIT(&states_to_delete);
2647
2648 if (!top->drv || !base->drv) {
2649 goto exit;
2650 }
2651
2652 new_top_bs = bdrv_find_overlay(active, top);
2653
2654 if (new_top_bs == NULL) {
2655 /* we could not find the image above 'top', this is an error */
2656 goto exit;
2657 }
2658
2659 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2660 * to do, no intermediate images */
2661 if (new_top_bs->backing_hd == base) {
2662 ret = 0;
2663 goto exit;
2664 }
2665
2666 intermediate = top;
2667
2668 /* now we will go down through the list, and add each BDS we find
2669 * into our deletion queue, until we hit the 'base'
2670 */
2671 while (intermediate) {
Markus Armbruster5839e532014-08-19 10:31:08 +02002672 intermediate_state = g_new0(BlkIntermediateStates, 1);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002673 intermediate_state->bs = intermediate;
2674 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2675
2676 if (intermediate->backing_hd == base) {
2677 base_bs = intermediate->backing_hd;
2678 break;
2679 }
2680 intermediate = intermediate->backing_hd;
2681 }
2682 if (base_bs == NULL) {
2683 /* something went wrong, we did not end at the base. safely
2684 * unravel everything, and exit with error */
2685 goto exit;
2686 }
2687
2688 /* success - we can delete the intermediate states, and link top->base */
Jeff Cody54e26902014-06-25 15:40:10 -04002689 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2690 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002691 base_bs->drv ? base_bs->drv->format_name : "");
2692 if (ret) {
2693 goto exit;
2694 }
Fam Zheng920beae2014-05-23 21:29:46 +08002695 bdrv_set_backing_hd(new_top_bs, base_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002696
2697 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2698 /* so that bdrv_close() does not recursively close the chain */
Fam Zheng920beae2014-05-23 21:29:46 +08002699 bdrv_set_backing_hd(intermediate_state->bs, NULL);
Fam Zheng4f6fd342013-08-23 09:14:47 +08002700 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002701 }
2702 ret = 0;
2703
2704exit:
2705 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2706 g_free(intermediate_state);
2707 }
2708 return ret;
2709}
2710
2711
aliguori71d07702009-03-03 17:37:16 +00002712static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2713 size_t size)
2714{
Peter Lieven75af1f32015-02-06 11:54:11 +01002715 if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
Kevin Wolf1dd3a442014-04-14 14:48:16 +02002716 return -EIO;
2717 }
2718
Max Reitzc0191e72015-02-05 13:58:24 -05002719 if (!bdrv_is_inserted(bs)) {
aliguori71d07702009-03-03 17:37:16 +00002720 return -ENOMEDIUM;
Max Reitzc0191e72015-02-05 13:58:24 -05002721 }
aliguori71d07702009-03-03 17:37:16 +00002722
Max Reitzc0191e72015-02-05 13:58:24 -05002723 if (offset < 0) {
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002724 return -EIO;
Max Reitzc0191e72015-02-05 13:58:24 -05002725 }
aliguori71d07702009-03-03 17:37:16 +00002726
2727 return 0;
2728}
2729
2730static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2731 int nb_sectors)
2732{
Peter Lieven75af1f32015-02-06 11:54:11 +01002733 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002734 return -EIO;
2735 }
2736
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002737 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2738 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002739}
2740
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002741typedef struct RwCo {
2742 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002743 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002744 QEMUIOVector *qiov;
2745 bool is_write;
2746 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002747 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002748} RwCo;
2749
2750static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2751{
2752 RwCo *rwco = opaque;
2753
2754 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002755 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2756 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002757 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002758 } else {
2759 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2760 rwco->qiov->size, rwco->qiov,
2761 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002762 }
2763}
2764
2765/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002766 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002767 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002768static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2769 QEMUIOVector *qiov, bool is_write,
2770 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002771{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002772 Coroutine *co;
2773 RwCo rwco = {
2774 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002775 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002776 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002777 .is_write = is_write,
2778 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002779 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002780 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002781
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002782 /**
2783 * In sync call context, when the vcpu is blocked, this throttling timer
2784 * will not fire; so the I/O throttling function has to be disabled here
2785 * if it has been enabled.
2786 */
2787 if (bs->io_limits_enabled) {
2788 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2789 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2790 bdrv_io_limits_disable(bs);
2791 }
2792
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002793 if (qemu_in_coroutine()) {
2794 /* Fast-path if already in coroutine context */
2795 bdrv_rw_co_entry(&rwco);
2796 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002797 AioContext *aio_context = bdrv_get_aio_context(bs);
2798
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002799 co = qemu_coroutine_create(bdrv_rw_co_entry);
2800 qemu_coroutine_enter(co, &rwco);
2801 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02002802 aio_poll(aio_context, true);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002803 }
2804 }
2805 return rwco.ret;
2806}
2807
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002808/*
2809 * Process a synchronous request using coroutines
2810 */
2811static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002812 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002813{
2814 QEMUIOVector qiov;
2815 struct iovec iov = {
2816 .iov_base = (void *)buf,
2817 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2818 };
2819
Peter Lieven75af1f32015-02-06 11:54:11 +01002820 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolfda15ee52014-04-14 15:39:36 +02002821 return -EINVAL;
2822 }
2823
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002824 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002825 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2826 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002827}
2828
bellard19cb3732006-08-19 11:45:59 +00002829/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002830int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002831 uint8_t *buf, int nb_sectors)
2832{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002833 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002834}
2835
Markus Armbruster07d27a42012-06-29 17:34:29 +02002836/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2837int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2838 uint8_t *buf, int nb_sectors)
2839{
2840 bool enabled;
2841 int ret;
2842
2843 enabled = bs->io_limits_enabled;
2844 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002845 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002846 bs->io_limits_enabled = enabled;
2847 return ret;
2848}
2849
ths5fafdf22007-09-16 21:08:06 +00002850/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002851 -EIO generic I/O error (may happen for all errors)
2852 -ENOMEDIUM No media inserted.
2853 -EINVAL Invalid sector number or nb_sectors
2854 -EACCES Trying to write a read-only device
2855*/
ths5fafdf22007-09-16 21:08:06 +00002856int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002857 const uint8_t *buf, int nb_sectors)
2858{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002859 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002860}
2861
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002862int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2863 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002864{
2865 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002866 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002867}
2868
Peter Lievend75cbb52013-10-24 12:07:03 +02002869/*
2870 * Completely zero out a block device with the help of bdrv_write_zeroes.
2871 * The operation is sped up by checking the block status and only writing
2872 * zeroes to the device if they currently do not return zeroes. Optional
2873 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2874 *
2875 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2876 */
2877int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2878{
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002879 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
Peter Lievend75cbb52013-10-24 12:07:03 +02002880 int n;
2881
Markus Armbrusterd32f7c12014-06-26 13:23:18 +02002882 target_sectors = bdrv_nb_sectors(bs);
2883 if (target_sectors < 0) {
2884 return target_sectors;
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002885 }
Kevin Wolf9ce10c02014-04-14 17:03:34 +02002886
Peter Lievend75cbb52013-10-24 12:07:03 +02002887 for (;;) {
Peter Lieven75af1f32015-02-06 11:54:11 +01002888 nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
Peter Lievend75cbb52013-10-24 12:07:03 +02002889 if (nb_sectors <= 0) {
2890 return 0;
2891 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002892 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002893 if (ret < 0) {
2894 error_report("error getting block status at sector %" PRId64 ": %s",
2895 sector_num, strerror(-ret));
2896 return ret;
2897 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002898 if (ret & BDRV_BLOCK_ZERO) {
2899 sector_num += n;
2900 continue;
2901 }
2902 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2903 if (ret < 0) {
2904 error_report("error writing zeroes at sector %" PRId64 ": %s",
2905 sector_num, strerror(-ret));
2906 return ret;
2907 }
2908 sector_num += n;
2909 }
2910}
2911
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002912int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002913{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002914 QEMUIOVector qiov;
2915 struct iovec iov = {
2916 .iov_base = (void *)buf,
2917 .iov_len = bytes,
2918 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002919 int ret;
bellard83f64092006-08-01 16:21:11 +00002920
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002921 if (bytes < 0) {
2922 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002923 }
2924
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002925 qemu_iovec_init_external(&qiov, &iov, 1);
2926 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2927 if (ret < 0) {
2928 return ret;
bellard83f64092006-08-01 16:21:11 +00002929 }
2930
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002931 return bytes;
bellard83f64092006-08-01 16:21:11 +00002932}
2933
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002934int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002935{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002936 int ret;
bellard83f64092006-08-01 16:21:11 +00002937
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002938 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2939 if (ret < 0) {
2940 return ret;
bellard83f64092006-08-01 16:21:11 +00002941 }
2942
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002943 return qiov->size;
2944}
2945
2946int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002947 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002948{
2949 QEMUIOVector qiov;
2950 struct iovec iov = {
2951 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002952 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002953 };
2954
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002955 if (bytes < 0) {
2956 return -EINVAL;
2957 }
2958
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002959 qemu_iovec_init_external(&qiov, &iov, 1);
2960 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002961}
bellard83f64092006-08-01 16:21:11 +00002962
Kevin Wolff08145f2010-06-16 16:38:15 +02002963/*
2964 * Writes to the file and ensures that no writes are reordered across this
2965 * request (acts as a barrier)
2966 *
2967 * Returns 0 on success, -errno in error cases.
2968 */
2969int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2970 const void *buf, int count)
2971{
2972 int ret;
2973
2974 ret = bdrv_pwrite(bs, offset, buf, count);
2975 if (ret < 0) {
2976 return ret;
2977 }
2978
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002979 /* No flush needed for cache modes that already do it */
2980 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002981 bdrv_flush(bs);
2982 }
2983
2984 return 0;
2985}
2986
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002987static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002988 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2989{
2990 /* Perform I/O through a temporary buffer so that users who scribble over
2991 * their read buffer while the operation is in progress do not end up
2992 * modifying the image file. This is critical for zero-copy guest I/O
2993 * where anything might happen inside guest memory.
2994 */
2995 void *bounce_buffer;
2996
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002997 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002998 struct iovec iov;
2999 QEMUIOVector bounce_qiov;
3000 int64_t cluster_sector_num;
3001 int cluster_nb_sectors;
3002 size_t skip_bytes;
3003 int ret;
3004
3005 /* Cover entire cluster so no additional backing file I/O is required when
3006 * allocating cluster in the image file.
3007 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01003008 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3009 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003010
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003011 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3012 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003013
3014 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
Kevin Wolf857d4f42014-05-20 13:16:51 +02003015 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3016 if (bounce_buffer == NULL) {
3017 ret = -ENOMEM;
3018 goto err;
3019 }
3020
Stefan Hajnocziab185922011-11-17 13:40:31 +00003021 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3022
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003023 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3024 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003025 if (ret < 0) {
3026 goto err;
3027 }
3028
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003029 if (drv->bdrv_co_write_zeroes &&
3030 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01003031 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003032 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003033 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003034 /* This does not change the data on the disk, it is not necessary
3035 * to flush even in cache=writethrough mode.
3036 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003037 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00003038 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00003039 }
3040
Stefan Hajnocziab185922011-11-17 13:40:31 +00003041 if (ret < 0) {
3042 /* It might be okay to ignore write errors for guest requests. If this
3043 * is a deliberate copy-on-read then we don't want to ignore the error.
3044 * Simply report it in all cases.
3045 */
3046 goto err;
3047 }
3048
3049 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04003050 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3051 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003052
3053err:
3054 qemu_vfree(bounce_buffer);
3055 return ret;
3056}
3057
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003058/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003059 * Forwards an already correctly aligned request to the BlockDriver. This
3060 * handles copy on read and zeroing after EOF; any other features must be
3061 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003062 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003063static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003064 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01003065 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02003066{
3067 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003068 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003069
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003070 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3071 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003072
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003073 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3074 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003075 assert(!qiov || bytes == qiov->size);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003076
3077 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003078 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01003079 /* If we touch the same cluster it counts as an overlap. This
3080 * guarantees that allocating writes will be serialized and not race
3081 * with each other for the same cluster. For example, in copy-on-read
3082 * it ensures that the CoR read and write operations are atomic and
3083 * guest writes cannot interleave between them. */
3084 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003085 }
3086
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01003087 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00003088
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003089 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00003090 int pnum;
3091
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003092 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003093 if (ret < 0) {
3094 goto out;
3095 }
3096
3097 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003098 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00003099 goto out;
3100 }
3101 }
3102
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003103 /* Forward the request to the BlockDriver */
Max Reitzc0191e72015-02-05 13:58:24 -05003104 if (!bs->zero_beyond_eof) {
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003105 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3106 } else {
Max Reitzc0191e72015-02-05 13:58:24 -05003107 /* Read zeros after EOF */
Markus Armbruster40490822014-06-26 13:23:19 +02003108 int64_t total_sectors, max_nb_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003109
Markus Armbruster40490822014-06-26 13:23:19 +02003110 total_sectors = bdrv_nb_sectors(bs);
3111 if (total_sectors < 0) {
3112 ret = total_sectors;
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003113 goto out;
3114 }
3115
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01003116 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3117 align >> BDRV_SECTOR_BITS);
Paolo Bonzinie012b782014-12-17 16:09:59 +01003118 if (nb_sectors < max_nb_sectors) {
3119 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3120 } else if (max_nb_sectors > 0) {
Kevin Wolf33f461e2014-07-03 13:21:24 +02003121 QEMUIOVector local_qiov;
Kevin Wolf33f461e2014-07-03 13:21:24 +02003122
3123 qemu_iovec_init(&local_qiov, qiov->niov);
3124 qemu_iovec_concat(&local_qiov, qiov, 0,
Paolo Bonzinie012b782014-12-17 16:09:59 +01003125 max_nb_sectors * BDRV_SECTOR_SIZE);
Kevin Wolf33f461e2014-07-03 13:21:24 +02003126
Paolo Bonzinie012b782014-12-17 16:09:59 +01003127 ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
Kevin Wolf33f461e2014-07-03 13:21:24 +02003128 &local_qiov);
3129
3130 qemu_iovec_destroy(&local_qiov);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08003131 } else {
3132 ret = 0;
3133 }
3134
3135 /* Reading beyond end of file is supposed to produce zeroes */
3136 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3137 uint64_t offset = MAX(0, total_sectors - sector_num);
3138 uint64_t bytes = (sector_num + nb_sectors - offset) *
3139 BDRV_SECTOR_SIZE;
3140 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3141 }
3142 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00003143
3144out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00003145 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003146}
3147
Fam Zhengfc3959e2015-03-24 09:23:49 +08003148static inline uint64_t bdrv_get_align(BlockDriverState *bs)
3149{
3150 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3151 return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3152}
3153
3154static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
3155 int64_t offset, size_t bytes)
3156{
3157 int64_t align = bdrv_get_align(bs);
3158 return !(offset & (align - 1) || (bytes & (align - 1)));
3159}
3160
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003161/*
3162 * Handle a read request in coroutine context
3163 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003164static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3165 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003166 BdrvRequestFlags flags)
3167{
3168 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01003169 BdrvTrackedRequest req;
3170
Fam Zhengfc3959e2015-03-24 09:23:49 +08003171 uint64_t align = bdrv_get_align(bs);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003172 uint8_t *head_buf = NULL;
3173 uint8_t *tail_buf = NULL;
3174 QEMUIOVector local_qiov;
3175 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003176 int ret;
3177
3178 if (!drv) {
3179 return -ENOMEDIUM;
3180 }
Max Reitzb9c64942015-02-05 13:58:25 -05003181
3182 ret = bdrv_check_byte_request(bs, offset, bytes);
3183 if (ret < 0) {
3184 return ret;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003185 }
3186
3187 if (bs->copy_on_read) {
3188 flags |= BDRV_REQ_COPY_ON_READ;
3189 }
3190
3191 /* throttling disk I/O */
3192 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003193 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003194 }
3195
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003196 /* Align read if necessary by padding qiov */
3197 if (offset & (align - 1)) {
3198 head_buf = qemu_blockalign(bs, align);
3199 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3200 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3201 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3202 use_local_qiov = true;
3203
3204 bytes += offset & (align - 1);
3205 offset = offset & ~(align - 1);
3206 }
3207
3208 if ((offset + bytes) & (align - 1)) {
3209 if (!use_local_qiov) {
3210 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3211 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3212 use_local_qiov = true;
3213 }
3214 tail_buf = qemu_blockalign(bs, align);
3215 qemu_iovec_add(&local_qiov, tail_buf,
3216 align - ((offset + bytes) & (align - 1)));
3217
3218 bytes = ROUND_UP(bytes, align);
3219 }
3220
Kevin Wolf65afd212013-12-03 14:55:55 +01003221 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003222 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003223 use_local_qiov ? &local_qiov : qiov,
3224 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003225 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003226
3227 if (use_local_qiov) {
3228 qemu_iovec_destroy(&local_qiov);
3229 qemu_vfree(head_buf);
3230 qemu_vfree(tail_buf);
3231 }
3232
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003233 return ret;
3234}
3235
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003236static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3237 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3238 BdrvRequestFlags flags)
3239{
Peter Lieven75af1f32015-02-06 11:54:11 +01003240 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003241 return -EINVAL;
3242 }
3243
3244 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3245 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3246}
3247
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003248int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003249 int nb_sectors, QEMUIOVector *qiov)
3250{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003251 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003252
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003253 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3254}
3255
3256int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3257 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3258{
3259 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3260
3261 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3262 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003263}
3264
Peter Lieven98764152015-02-02 15:48:34 +01003265#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
Peter Lievenc31cb702013-10-24 12:06:58 +02003266
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003267static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003268 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003269{
3270 BlockDriver *drv = bs->drv;
3271 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003272 struct iovec iov = {0};
3273 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003274
Peter Lieven75af1f32015-02-06 11:54:11 +01003275 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
3276 BDRV_REQUEST_MAX_SECTORS);
Kevin Wolf621f0582012-03-20 15:12:58 +01003277
Peter Lievenc31cb702013-10-24 12:06:58 +02003278 while (nb_sectors > 0 && !ret) {
3279 int num = nb_sectors;
3280
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003281 /* Align request. Block drivers can expect the "bulk" of the request
3282 * to be aligned.
3283 */
3284 if (bs->bl.write_zeroes_alignment
3285 && num > bs->bl.write_zeroes_alignment) {
3286 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3287 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003288 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003289 num -= sector_num % bs->bl.write_zeroes_alignment;
3290 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3291 /* Shorten the request to the last aligned sector. num cannot
3292 * underflow because num > bs->bl.write_zeroes_alignment.
3293 */
3294 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003295 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003296 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003297
3298 /* limit request size */
3299 if (num > max_write_zeroes) {
3300 num = max_write_zeroes;
3301 }
3302
3303 ret = -ENOTSUP;
3304 /* First try the efficient write zeroes operation */
3305 if (drv->bdrv_co_write_zeroes) {
3306 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3307 }
3308
3309 if (ret == -ENOTSUP) {
3310 /* Fall back to bounce buffer if write zeroes is unsupported */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003311 int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
Peter Lieven98764152015-02-02 15:48:34 +01003312 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
Peter Lieven095e4fa2015-01-05 12:29:49 +01003313 num = MIN(num, max_xfer_len);
Peter Lievenc31cb702013-10-24 12:06:58 +02003314 iov.iov_len = num * BDRV_SECTOR_SIZE;
3315 if (iov.iov_base == NULL) {
Kevin Wolf857d4f42014-05-20 13:16:51 +02003316 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3317 if (iov.iov_base == NULL) {
3318 ret = -ENOMEM;
3319 goto fail;
3320 }
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003321 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003322 }
3323 qemu_iovec_init_external(&qiov, &iov, 1);
3324
3325 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003326
3327 /* Keep bounce buffer around if it is big enough for all
3328 * all future requests.
3329 */
Peter Lieven095e4fa2015-01-05 12:29:49 +01003330 if (num < max_xfer_len) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003331 qemu_vfree(iov.iov_base);
3332 iov.iov_base = NULL;
3333 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003334 }
3335
3336 sector_num += num;
3337 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003338 }
3339
Kevin Wolf857d4f42014-05-20 13:16:51 +02003340fail:
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003341 qemu_vfree(iov.iov_base);
3342 return ret;
3343}
3344
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003345/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003346 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003347 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003348static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003349 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3350 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003351{
3352 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003353 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003354 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003355
Kevin Wolfb404f722013-12-03 14:02:23 +01003356 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3357 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003358
Kevin Wolfb404f722013-12-03 14:02:23 +01003359 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3360 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Kevin Wolf8eb029c2014-07-01 16:09:54 +02003361 assert(!qiov || bytes == qiov->size);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003362
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003363 waited = wait_serialising_requests(req);
3364 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003365 assert(req->overlap_offset <= offset);
3366 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003367
Kevin Wolf65afd212013-12-03 14:55:55 +01003368 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003369
Peter Lieven465bee12014-05-18 00:58:19 +02003370 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3371 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3372 qemu_iovec_is_zero(qiov)) {
3373 flags |= BDRV_REQ_ZERO_WRITE;
3374 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3375 flags |= BDRV_REQ_MAY_UNMAP;
3376 }
3377 }
3378
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003379 if (ret < 0) {
3380 /* Do nothing, write notifier decided to fail this request */
3381 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003382 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003383 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003384 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003385 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003386 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3387 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003388 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003389
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003390 if (ret == 0 && !bs->enable_write_cache) {
3391 ret = bdrv_co_flush(bs);
3392 }
3393
Fam Zhenge4654d22013-11-13 18:29:43 +08003394 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003395
Benoît Canet5366d0c2014-09-05 15:46:18 +02003396 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
Benoît Canet5e5a94b2014-09-05 15:46:16 +02003397
Max Reitzc0191e72015-02-05 13:58:24 -05003398 if (ret >= 0) {
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003399 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3400 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003401
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003402 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003403}
3404
Kevin Wolfb404f722013-12-03 14:02:23 +01003405/*
3406 * Handle a write request in coroutine context
3407 */
Kevin Wolf66015532013-12-03 14:40:18 +01003408static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3409 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003410 BdrvRequestFlags flags)
3411{
Kevin Wolf65afd212013-12-03 14:55:55 +01003412 BdrvTrackedRequest req;
Fam Zhengfc3959e2015-03-24 09:23:49 +08003413 uint64_t align = bdrv_get_align(bs);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003414 uint8_t *head_buf = NULL;
3415 uint8_t *tail_buf = NULL;
3416 QEMUIOVector local_qiov;
3417 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003418 int ret;
3419
3420 if (!bs->drv) {
3421 return -ENOMEDIUM;
3422 }
3423 if (bs->read_only) {
3424 return -EACCES;
3425 }
Max Reitzb9c64942015-02-05 13:58:25 -05003426
3427 ret = bdrv_check_byte_request(bs, offset, bytes);
3428 if (ret < 0) {
3429 return ret;
Kevin Wolfb404f722013-12-03 14:02:23 +01003430 }
3431
Kevin Wolfb404f722013-12-03 14:02:23 +01003432 /* throttling disk I/O */
3433 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003434 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003435 }
3436
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003437 /*
3438 * Align write if necessary by performing a read-modify-write cycle.
3439 * Pad qiov with the read parts and be sure to have a tracked request not
3440 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3441 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003442 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003443
3444 if (offset & (align - 1)) {
3445 QEMUIOVector head_qiov;
3446 struct iovec head_iov;
3447
3448 mark_request_serialising(&req, align);
3449 wait_serialising_requests(&req);
3450
3451 head_buf = qemu_blockalign(bs, align);
3452 head_iov = (struct iovec) {
3453 .iov_base = head_buf,
3454 .iov_len = align,
3455 };
3456 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3457
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003458 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003459 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3460 align, &head_qiov, 0);
3461 if (ret < 0) {
3462 goto fail;
3463 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003464 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003465
3466 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3467 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3468 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3469 use_local_qiov = true;
3470
3471 bytes += offset & (align - 1);
3472 offset = offset & ~(align - 1);
3473 }
3474
3475 if ((offset + bytes) & (align - 1)) {
3476 QEMUIOVector tail_qiov;
3477 struct iovec tail_iov;
3478 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003479 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003480
3481 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003482 waited = wait_serialising_requests(&req);
3483 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003484
3485 tail_buf = qemu_blockalign(bs, align);
3486 tail_iov = (struct iovec) {
3487 .iov_base = tail_buf,
3488 .iov_len = align,
3489 };
3490 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3491
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003492 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003493 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3494 align, &tail_qiov, 0);
3495 if (ret < 0) {
3496 goto fail;
3497 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003498 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003499
3500 if (!use_local_qiov) {
3501 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3502 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3503 use_local_qiov = true;
3504 }
3505
3506 tail_bytes = (offset + bytes) & (align - 1);
3507 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3508
3509 bytes = ROUND_UP(bytes, align);
3510 }
3511
Fam Zhengfc3959e2015-03-24 09:23:49 +08003512 if (use_local_qiov) {
3513 /* Local buffer may have non-zero data. */
3514 flags &= ~BDRV_REQ_ZERO_WRITE;
3515 }
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003516 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3517 use_local_qiov ? &local_qiov : qiov,
3518 flags);
3519
3520fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003521 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003522
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003523 if (use_local_qiov) {
3524 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003525 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003526 qemu_vfree(head_buf);
3527 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003528
Kevin Wolfb404f722013-12-03 14:02:23 +01003529 return ret;
3530}
3531
Kevin Wolf66015532013-12-03 14:40:18 +01003532static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3533 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3534 BdrvRequestFlags flags)
3535{
Peter Lieven75af1f32015-02-06 11:54:11 +01003536 if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
Kevin Wolf66015532013-12-03 14:40:18 +01003537 return -EINVAL;
3538 }
3539
3540 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3541 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3542}
3543
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003544int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3545 int nb_sectors, QEMUIOVector *qiov)
3546{
3547 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3548
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003549 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3550}
3551
3552int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003553 int64_t sector_num, int nb_sectors,
3554 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003555{
Fam Zhengfc3959e2015-03-24 09:23:49 +08003556 int ret;
3557
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003558 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003559
Peter Lievend32f35c2013-10-24 12:06:52 +02003560 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3561 flags &= ~BDRV_REQ_MAY_UNMAP;
3562 }
Fam Zhengfc3959e2015-03-24 09:23:49 +08003563 if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
3564 nb_sectors << BDRV_SECTOR_BITS)) {
3565 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3566 BDRV_REQ_ZERO_WRITE | flags);
3567 } else {
3568 uint8_t *buf;
3569 QEMUIOVector local_qiov;
3570 size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
Peter Lievend32f35c2013-10-24 12:06:52 +02003571
Fam Zhengfc3959e2015-03-24 09:23:49 +08003572 buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
3573 memset(buf, 0, bytes);
3574 qemu_iovec_init(&local_qiov, 1);
3575 qemu_iovec_add(&local_qiov, buf, bytes);
3576
3577 ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
3578 BDRV_REQ_ZERO_WRITE | flags);
3579 qemu_vfree(buf);
3580 }
3581 return ret;
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003582}
3583
bellard83f64092006-08-01 16:21:11 +00003584/**
bellard83f64092006-08-01 16:21:11 +00003585 * Truncate file to 'offset' bytes (needed only for file protocols)
3586 */
3587int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3588{
3589 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003590 int ret;
bellard83f64092006-08-01 16:21:11 +00003591 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003592 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003593 if (!drv->bdrv_truncate)
3594 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003595 if (bs->read_only)
3596 return -EACCES;
Jeff Cody9c75e162014-06-25 16:55:30 -04003597
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003598 ret = drv->bdrv_truncate(bs, offset);
3599 if (ret == 0) {
3600 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003601 if (bs->blk) {
3602 blk_dev_resize_cb(bs->blk);
3603 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003604 }
3605 return ret;
bellard83f64092006-08-01 16:21:11 +00003606}
3607
3608/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003609 * Length of a allocated file in bytes. Sparse files are counted by actual
3610 * allocated space. Return < 0 if error or unknown.
3611 */
3612int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3613{
3614 BlockDriver *drv = bs->drv;
3615 if (!drv) {
3616 return -ENOMEDIUM;
3617 }
3618 if (drv->bdrv_get_allocated_file_size) {
3619 return drv->bdrv_get_allocated_file_size(bs);
3620 }
3621 if (bs->file) {
3622 return bdrv_get_allocated_file_size(bs->file);
3623 }
3624 return -ENOTSUP;
3625}
3626
3627/**
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003628 * Return number of sectors on success, -errno on error.
bellard83f64092006-08-01 16:21:11 +00003629 */
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003630int64_t bdrv_nb_sectors(BlockDriverState *bs)
bellard83f64092006-08-01 16:21:11 +00003631{
3632 BlockDriver *drv = bs->drv;
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003633
bellard83f64092006-08-01 16:21:11 +00003634 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003635 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003636
Kevin Wolfb94a2612013-10-29 12:18:58 +01003637 if (drv->has_variable_length) {
3638 int ret = refresh_total_sectors(bs, bs->total_sectors);
3639 if (ret < 0) {
3640 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003641 }
bellard83f64092006-08-01 16:21:11 +00003642 }
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003643 return bs->total_sectors;
3644}
3645
3646/**
3647 * Return length in bytes on success, -errno on error.
3648 * The length is always a multiple of BDRV_SECTOR_SIZE.
3649 */
3650int64_t bdrv_getlength(BlockDriverState *bs)
3651{
3652 int64_t ret = bdrv_nb_sectors(bs);
3653
3654 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003655}
3656
bellard19cb3732006-08-19 11:45:59 +00003657/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003658void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003659{
Markus Armbruster65a9bb22014-06-26 13:23:17 +02003660 int64_t nb_sectors = bdrv_nb_sectors(bs);
3661
3662 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
bellardfc01f7e2003-06-30 10:03:06 +00003663}
bellardcf989512004-02-16 21:56:36 +00003664
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003665void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3666 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003667{
3668 bs->on_read_error = on_read_error;
3669 bs->on_write_error = on_write_error;
3670}
3671
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003672BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003673{
3674 return is_read ? bs->on_read_error : bs->on_write_error;
3675}
3676
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003677BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3678{
3679 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3680
3681 switch (on_err) {
3682 case BLOCKDEV_ON_ERROR_ENOSPC:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003683 return (error == ENOSPC) ?
3684 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003685 case BLOCKDEV_ON_ERROR_STOP:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003686 return BLOCK_ERROR_ACTION_STOP;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003687 case BLOCKDEV_ON_ERROR_REPORT:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003688 return BLOCK_ERROR_ACTION_REPORT;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003689 case BLOCKDEV_ON_ERROR_IGNORE:
Wenchao Xiaa5895692014-06-18 08:43:30 +02003690 return BLOCK_ERROR_ACTION_IGNORE;
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003691 default:
3692 abort();
3693 }
3694}
3695
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003696static void send_qmp_error_event(BlockDriverState *bs,
3697 BlockErrorAction action,
3698 bool is_read, int error)
3699{
Peter Maydell573742a2014-10-10 20:33:03 +01003700 IoOperationType optype;
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003701
Peter Maydell573742a2014-10-10 20:33:03 +01003702 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3703 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003704 bdrv_iostatus_is_enabled(bs),
Luiz Capitulino624ff572014-09-11 10:25:48 -04003705 error == ENOSPC, strerror(error),
3706 &error_abort);
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003707}
3708
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003709/* This is done by device models because, while the block layer knows
3710 * about the error, it does not know whether an operation comes from
3711 * the device or the block layer (from a job, for example).
3712 */
3713void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3714 bool is_read, int error)
3715{
3716 assert(error >= 0);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003717
Wenchao Xiaa5895692014-06-18 08:43:30 +02003718 if (action == BLOCK_ERROR_ACTION_STOP) {
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003719 /* First set the iostatus, so that "info block" returns an iostatus
3720 * that matches the events raised so far (an additional error iostatus
3721 * is fine, but not a lost one).
3722 */
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003723 bdrv_iostatus_set_err(bs, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003724
3725 /* Then raise the request to stop the VM and the event.
3726 * qemu_system_vmstop_request_prepare has two effects. First,
3727 * it ensures that the STOP event always comes after the
3728 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3729 * can observe the STOP event and do a "cont" before the STOP
3730 * event is issued, the VM will not stop. In this case, vm_start()
3731 * also ensures that the STOP/RESUME pair of events is emitted.
3732 */
3733 qemu_system_vmstop_request_prepare();
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003734 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini2bd3bce2014-06-05 14:53:59 +02003735 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3736 } else {
Luiz Capitulinoc7c2ff02014-08-29 16:07:27 -04003737 send_qmp_error_event(bs, action, is_read, error);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003738 }
3739}
3740
bellardb3380822004-03-14 21:38:54 +00003741int bdrv_is_read_only(BlockDriverState *bs)
3742{
3743 return bs->read_only;
3744}
3745
ths985a03b2007-12-24 16:10:43 +00003746int bdrv_is_sg(BlockDriverState *bs)
3747{
3748 return bs->sg;
3749}
3750
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003751int bdrv_enable_write_cache(BlockDriverState *bs)
3752{
3753 return bs->enable_write_cache;
3754}
3755
Paolo Bonzini425b0142012-06-06 00:04:52 +02003756void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3757{
3758 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003759
3760 /* so a reopen() will preserve wce */
3761 if (wce) {
3762 bs->open_flags |= BDRV_O_CACHE_WB;
3763 } else {
3764 bs->open_flags &= ~BDRV_O_CACHE_WB;
3765 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003766}
3767
bellardea2384d2004-08-01 21:59:26 +00003768int bdrv_is_encrypted(BlockDriverState *bs)
3769{
3770 if (bs->backing_hd && bs->backing_hd->encrypted)
3771 return 1;
3772 return bs->encrypted;
3773}
3774
aliguoric0f4ce72009-03-05 23:01:01 +00003775int bdrv_key_required(BlockDriverState *bs)
3776{
3777 BlockDriverState *backing_hd = bs->backing_hd;
3778
3779 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3780 return 1;
3781 return (bs->encrypted && !bs->valid_key);
3782}
3783
bellardea2384d2004-08-01 21:59:26 +00003784int bdrv_set_key(BlockDriverState *bs, const char *key)
3785{
3786 int ret;
3787 if (bs->backing_hd && bs->backing_hd->encrypted) {
3788 ret = bdrv_set_key(bs->backing_hd, key);
3789 if (ret < 0)
3790 return ret;
3791 if (!bs->encrypted)
3792 return 0;
3793 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003794 if (!bs->encrypted) {
3795 return -EINVAL;
3796 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3797 return -ENOMEDIUM;
3798 }
aliguoric0f4ce72009-03-05 23:01:01 +00003799 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003800 if (ret < 0) {
3801 bs->valid_key = 0;
3802 } else if (!bs->valid_key) {
3803 bs->valid_key = 1;
Markus Armbrustera7f53e22014-10-07 13:59:25 +02003804 if (bs->blk) {
3805 /* call the change callback now, we skipped it on open */
3806 blk_dev_change_media_cb(bs->blk, true);
3807 }
aliguoribb5fc202009-03-05 23:01:15 +00003808 }
aliguoric0f4ce72009-03-05 23:01:01 +00003809 return ret;
bellardea2384d2004-08-01 21:59:26 +00003810}
3811
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003812/*
3813 * Provide an encryption key for @bs.
3814 * If @key is non-null:
3815 * If @bs is not encrypted, fail.
3816 * Else if the key is invalid, fail.
3817 * Else set @bs's key to @key, replacing the existing key, if any.
3818 * If @key is null:
3819 * If @bs is encrypted and still lacks a key, fail.
3820 * Else do nothing.
3821 * On failure, store an error object through @errp if non-null.
3822 */
3823void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
3824{
3825 if (key) {
3826 if (!bdrv_is_encrypted(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003827 error_setg(errp, "Device '%s' is not encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003828 bdrv_get_device_name(bs));
3829 } else if (bdrv_set_key(bs, key) < 0) {
3830 error_set(errp, QERR_INVALID_PASSWORD);
3831 }
3832 } else {
3833 if (bdrv_key_required(bs)) {
Markus Armbrusterb1ca6392015-01-29 10:37:01 +01003834 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
3835 "'%s' (%s) is encrypted",
Markus Armbruster4d2855a2015-01-29 10:37:00 +01003836 bdrv_get_device_name(bs),
3837 bdrv_get_encrypted_filename(bs));
3838 }
3839 }
3840}
3841
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003842const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003843{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003844 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003845}
3846
Stefan Hajnocziada42402014-08-27 12:08:55 +01003847static int qsort_strcmp(const void *a, const void *b)
3848{
3849 return strcmp(a, b);
3850}
3851
ths5fafdf22007-09-16 21:08:06 +00003852void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003853 void *opaque)
3854{
3855 BlockDriver *drv;
Jeff Codye855e4f2014-04-28 18:29:54 -04003856 int count = 0;
Stefan Hajnocziada42402014-08-27 12:08:55 +01003857 int i;
Jeff Codye855e4f2014-04-28 18:29:54 -04003858 const char **formats = NULL;
bellardea2384d2004-08-01 21:59:26 +00003859
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003860 QLIST_FOREACH(drv, &bdrv_drivers, list) {
Jeff Codye855e4f2014-04-28 18:29:54 -04003861 if (drv->format_name) {
3862 bool found = false;
3863 int i = count;
3864 while (formats && i && !found) {
3865 found = !strcmp(formats[--i], drv->format_name);
3866 }
3867
3868 if (!found) {
Markus Armbruster5839e532014-08-19 10:31:08 +02003869 formats = g_renew(const char *, formats, count + 1);
Jeff Codye855e4f2014-04-28 18:29:54 -04003870 formats[count++] = drv->format_name;
Jeff Codye855e4f2014-04-28 18:29:54 -04003871 }
3872 }
bellardea2384d2004-08-01 21:59:26 +00003873 }
Stefan Hajnocziada42402014-08-27 12:08:55 +01003874
3875 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3876
3877 for (i = 0; i < count; i++) {
3878 it(opaque, formats[i]);
3879 }
3880
Jeff Codye855e4f2014-04-28 18:29:54 -04003881 g_free(formats);
bellardea2384d2004-08-01 21:59:26 +00003882}
3883
Benoît Canetdc364f42014-01-23 21:31:32 +01003884/* This function is to find a node in the bs graph */
3885BlockDriverState *bdrv_find_node(const char *node_name)
3886{
3887 BlockDriverState *bs;
3888
3889 assert(node_name);
3890
3891 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3892 if (!strcmp(node_name, bs->node_name)) {
3893 return bs;
3894 }
3895 }
3896 return NULL;
3897}
3898
Benoît Canetc13163f2014-01-23 21:31:34 +01003899/* Put this QMP function here so it can access the static graph_bdrv_states. */
3900BlockDeviceInfoList *bdrv_named_nodes_list(void)
3901{
3902 BlockDeviceInfoList *list, *entry;
3903 BlockDriverState *bs;
3904
3905 list = NULL;
3906 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3907 entry = g_malloc0(sizeof(*entry));
3908 entry->value = bdrv_block_device_info(bs);
3909 entry->next = list;
3910 list = entry;
3911 }
3912
3913 return list;
3914}
3915
Benoît Canet12d3ba82014-01-23 21:31:35 +01003916BlockDriverState *bdrv_lookup_bs(const char *device,
3917 const char *node_name,
3918 Error **errp)
3919{
Markus Armbruster7f06d472014-10-07 13:59:12 +02003920 BlockBackend *blk;
3921 BlockDriverState *bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003922
Benoît Canet12d3ba82014-01-23 21:31:35 +01003923 if (device) {
Markus Armbruster7f06d472014-10-07 13:59:12 +02003924 blk = blk_by_name(device);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003925
Markus Armbruster7f06d472014-10-07 13:59:12 +02003926 if (blk) {
3927 return blk_bs(blk);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003928 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003929 }
3930
Benoît Canetdd67fa52014-02-12 17:15:06 +01003931 if (node_name) {
3932 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003933
Benoît Canetdd67fa52014-02-12 17:15:06 +01003934 if (bs) {
3935 return bs;
3936 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003937 }
3938
Benoît Canetdd67fa52014-02-12 17:15:06 +01003939 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3940 device ? device : "",
3941 node_name ? node_name : "");
3942 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003943}
3944
Jeff Cody5a6684d2014-06-25 15:40:09 -04003945/* If 'base' is in the same chain as 'top', return true. Otherwise,
3946 * return false. If either argument is NULL, return false. */
3947bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3948{
3949 while (top && top != base) {
3950 top = top->backing_hd;
3951 }
3952
3953 return top != NULL;
3954}
3955
Fam Zheng04df7652014-10-31 11:32:54 +08003956BlockDriverState *bdrv_next_node(BlockDriverState *bs)
3957{
3958 if (!bs) {
3959 return QTAILQ_FIRST(&graph_bdrv_states);
3960 }
3961 return QTAILQ_NEXT(bs, node_list);
3962}
3963
Markus Armbruster2f399b02010-06-02 18:55:20 +02003964BlockDriverState *bdrv_next(BlockDriverState *bs)
3965{
3966 if (!bs) {
3967 return QTAILQ_FIRST(&bdrv_states);
3968 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003969 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003970}
3971
Fam Zheng20a9e772014-10-31 11:32:55 +08003972const char *bdrv_get_node_name(const BlockDriverState *bs)
3973{
3974 return bs->node_name;
3975}
3976
Markus Armbruster7f06d472014-10-07 13:59:12 +02003977/* TODO check what callers really want: bs->node_name or blk_name() */
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003978const char *bdrv_get_device_name(const BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003979{
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02003980 return bs->blk ? blk_name(bs->blk) : "";
bellardea2384d2004-08-01 21:59:26 +00003981}
3982
Markus Armbrusterc8433282012-06-05 16:49:24 +02003983int bdrv_get_flags(BlockDriverState *bs)
3984{
3985 return bs->open_flags;
3986}
3987
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003988int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003989{
3990 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003991 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003992
Benoît Canetdc364f42014-01-23 21:31:32 +01003993 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02003994 AioContext *aio_context = bdrv_get_aio_context(bs);
3995 int ret;
3996
3997 aio_context_acquire(aio_context);
3998 ret = bdrv_flush(bs);
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003999 if (ret < 0 && !result) {
4000 result = ret;
4001 }
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02004002 aio_context_release(aio_context);
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01004003 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02004004
4005 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00004006}
4007
Peter Lieven3ac21622013-06-28 12:47:42 +02004008int bdrv_has_zero_init_1(BlockDriverState *bs)
4009{
4010 return 1;
4011}
4012
Kevin Wolff2feebb2010-04-14 17:30:35 +02004013int bdrv_has_zero_init(BlockDriverState *bs)
4014{
4015 assert(bs->drv);
4016
Paolo Bonzini11212d82013-09-04 19:00:27 +02004017 /* If BS is a copy on write image, it is initialized to
4018 the contents of the base image, which may not be zeroes. */
4019 if (bs->backing_hd) {
4020 return 0;
4021 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02004022 if (bs->drv->bdrv_has_zero_init) {
4023 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02004024 }
4025
Peter Lieven3ac21622013-06-28 12:47:42 +02004026 /* safe default */
4027 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02004028}
4029
Peter Lieven4ce78692013-10-24 12:06:54 +02004030bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
4031{
4032 BlockDriverInfo bdi;
4033
4034 if (bs->backing_hd) {
4035 return false;
4036 }
4037
4038 if (bdrv_get_info(bs, &bdi) == 0) {
4039 return bdi.unallocated_blocks_are_zero;
4040 }
4041
4042 return false;
4043}
4044
4045bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
4046{
4047 BlockDriverInfo bdi;
4048
4049 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
4050 return false;
4051 }
4052
4053 if (bdrv_get_info(bs, &bdi) == 0) {
4054 return bdi.can_write_zeroes_with_unmap;
4055 }
4056
4057 return false;
4058}
4059
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004060typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004061 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01004062 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004063 int64_t sector_num;
4064 int nb_sectors;
4065 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004066 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004067 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004068} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00004069
thsf58c7b32008-06-05 21:53:49 +00004070/*
Fam Zheng705be722014-11-10 17:10:38 +08004071 * Returns the allocation status of the specified sectors.
4072 * Drivers not implementing the functionality are assumed to not support
4073 * backing files, hence all their sectors are reported as allocated.
thsf58c7b32008-06-05 21:53:49 +00004074 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004075 * If 'sector_num' is beyond the end of the disk image the return value is 0
4076 * and 'pnum' is set to 0.
4077 *
thsf58c7b32008-06-05 21:53:49 +00004078 * 'pnum' is set to the number of sectors (including and immediately following
4079 * the specified sector) that are known to be in the same
4080 * allocated/unallocated state.
4081 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004082 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4083 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00004084 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004085static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4086 int64_t sector_num,
4087 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00004088{
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004089 int64_t total_sectors;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004090 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004091 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004092
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004093 total_sectors = bdrv_nb_sectors(bs);
4094 if (total_sectors < 0) {
4095 return total_sectors;
Paolo Bonzini617ccb42013-09-04 19:00:23 +02004096 }
4097
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004098 if (sector_num >= total_sectors) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004099 *pnum = 0;
4100 return 0;
4101 }
4102
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004103 n = total_sectors - sector_num;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004104 if (n < nb_sectors) {
4105 nb_sectors = n;
4106 }
4107
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004108 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00004109 *pnum = nb_sectors;
Kevin Wolfe88ae222014-05-06 15:25:36 +02004110 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02004111 if (bs->drv->protocol_name) {
4112 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4113 }
4114 return ret;
thsf58c7b32008-06-05 21:53:49 +00004115 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004116
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004117 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4118 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02004119 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004120 return ret;
4121 }
4122
Peter Lieven92bc50a2013-10-08 14:43:14 +02004123 if (ret & BDRV_BLOCK_RAW) {
4124 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4125 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4126 *pnum, pnum);
4127 }
4128
Kevin Wolfe88ae222014-05-06 15:25:36 +02004129 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4130 ret |= BDRV_BLOCK_ALLOCATED;
4131 }
4132
Peter Lievenc3d86882013-10-24 12:07:04 +02004133 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4134 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004135 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02004136 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004137 BlockDriverState *bs2 = bs->backing_hd;
Markus Armbruster30a7f2f2014-06-26 13:23:20 +02004138 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4139 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02004140 ret |= BDRV_BLOCK_ZERO;
4141 }
4142 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004143 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004144
4145 if (bs->file &&
4146 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4147 (ret & BDRV_BLOCK_OFFSET_VALID)) {
Max Reitz59c9a952014-10-22 17:00:15 +02004148 int file_pnum;
4149
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004150 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
Max Reitz59c9a952014-10-22 17:00:15 +02004151 *pnum, &file_pnum);
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004152 if (ret2 >= 0) {
4153 /* Ignore errors. This is just providing extra information, it
4154 * is useful but not necessary.
4155 */
Max Reitz59c9a952014-10-22 17:00:15 +02004156 if (!file_pnum) {
4157 /* !file_pnum indicates an offset at or beyond the EOF; it is
4158 * perfectly valid for the format block driver to point to such
4159 * offsets, so catch it and mark everything as zero */
4160 ret |= BDRV_BLOCK_ZERO;
4161 } else {
4162 /* Limit request to the range reported by the protocol driver */
4163 *pnum = file_pnum;
4164 ret |= (ret2 & BDRV_BLOCK_ZERO);
4165 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02004166 }
4167 }
4168
Paolo Bonzini415b5b02013-09-04 19:00:31 +02004169 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004170}
4171
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004172/* Coroutine wrapper for bdrv_get_block_status() */
4173static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004174{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004175 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004176 BlockDriverState *bs = data->bs;
4177
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004178 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4179 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004180 data->done = true;
4181}
4182
4183/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004184 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004185 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004186 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004187 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004188int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4189 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00004190{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004191 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004192 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004193 .bs = bs,
4194 .sector_num = sector_num,
4195 .nb_sectors = nb_sectors,
4196 .pnum = pnum,
4197 .done = false,
4198 };
4199
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004200 if (qemu_in_coroutine()) {
4201 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004202 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004203 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004204 AioContext *aio_context = bdrv_get_aio_context(bs);
4205
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004206 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004207 qemu_coroutine_enter(co, &data);
4208 while (!data.done) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004209 aio_poll(aio_context, true);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004210 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00004211 }
4212 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00004213}
4214
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004215int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4216 int nb_sectors, int *pnum)
4217{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02004218 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4219 if (ret < 0) {
4220 return ret;
4221 }
Kevin Wolf01fb2702014-07-07 17:00:37 +02004222 return !!(ret & BDRV_BLOCK_ALLOCATED);
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02004223}
4224
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004225/*
4226 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4227 *
4228 * Return true if the given sector is allocated in any image between
4229 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4230 * sector is allocated in any image of the chain. Return false otherwise.
4231 *
4232 * 'pnum' is set to the number of sectors (including and immediately following
4233 * the specified sector) that are known to be in the same
4234 * allocated/unallocated state.
4235 *
4236 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02004237int bdrv_is_allocated_above(BlockDriverState *top,
4238 BlockDriverState *base,
4239 int64_t sector_num,
4240 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004241{
4242 BlockDriverState *intermediate;
4243 int ret, n = nb_sectors;
4244
4245 intermediate = top;
4246 while (intermediate && intermediate != base) {
4247 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02004248 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4249 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004250 if (ret < 0) {
4251 return ret;
4252 } else if (ret) {
4253 *pnum = pnum_inter;
4254 return 1;
4255 }
4256
4257 /*
4258 * [sector_num, nb_sectors] is unallocated on top but intermediate
4259 * might have
4260 *
4261 * [sector_num+x, nr_sectors] allocated.
4262 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08004263 if (n > pnum_inter &&
4264 (intermediate == top ||
4265 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02004266 n = pnum_inter;
4267 }
4268
4269 intermediate = intermediate->backing_hd;
4270 }
4271
4272 *pnum = n;
4273 return 0;
4274}
4275
aliguori045df332009-03-05 23:00:48 +00004276const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4277{
4278 if (bs->backing_hd && bs->backing_hd->encrypted)
4279 return bs->backing_file;
4280 else if (bs->encrypted)
4281 return bs->filename;
4282 else
4283 return NULL;
4284}
4285
ths5fafdf22007-09-16 21:08:06 +00004286void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00004287 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00004288{
Kevin Wolf3574c602011-10-26 11:02:11 +02004289 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00004290}
4291
ths5fafdf22007-09-16 21:08:06 +00004292int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00004293 const uint8_t *buf, int nb_sectors)
4294{
4295 BlockDriver *drv = bs->drv;
Max Reitzb9c64942015-02-05 13:58:25 -05004296 int ret;
4297
4298 if (!drv) {
bellard19cb3732006-08-19 11:45:59 +00004299 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05004300 }
4301 if (!drv->bdrv_write_compressed) {
bellardfaea38e2006-08-05 21:31:00 +00004302 return -ENOTSUP;
Max Reitzb9c64942015-02-05 13:58:25 -05004303 }
4304 ret = bdrv_check_request(bs, sector_num, nb_sectors);
4305 if (ret < 0) {
4306 return ret;
4307 }
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004308
Fam Zhenge4654d22013-11-13 18:29:43 +08004309 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01004310
bellardfaea38e2006-08-05 21:31:00 +00004311 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4312}
ths3b46e622007-09-17 08:09:54 +00004313
bellardfaea38e2006-08-05 21:31:00 +00004314int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4315{
4316 BlockDriver *drv = bs->drv;
4317 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00004318 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00004319 if (!drv->bdrv_get_info)
4320 return -ENOTSUP;
4321 memset(bdi, 0, sizeof(*bdi));
4322 return drv->bdrv_get_info(bs, bdi);
4323}
4324
Max Reitzeae041f2013-10-09 10:46:16 +02004325ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4326{
4327 BlockDriver *drv = bs->drv;
4328 if (drv && drv->bdrv_get_specific_info) {
4329 return drv->bdrv_get_specific_info(bs);
4330 }
4331 return NULL;
4332}
4333
Christoph Hellwig45566e92009-07-10 23:11:57 +02004334int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4335 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004336{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004337 QEMUIOVector qiov;
4338 struct iovec iov = {
4339 .iov_base = (void *) buf,
4340 .iov_len = size,
4341 };
4342
4343 qemu_iovec_init_external(&qiov, &iov, 1);
4344 return bdrv_writev_vmstate(bs, &qiov, pos);
4345}
4346
4347int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4348{
aliguori178e08a2009-04-05 19:10:55 +00004349 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004350
4351 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004352 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004353 } else if (drv->bdrv_save_vmstate) {
4354 return drv->bdrv_save_vmstate(bs, qiov, pos);
4355 } else if (bs->file) {
4356 return bdrv_writev_vmstate(bs->file, qiov, pos);
4357 }
4358
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004359 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004360}
4361
Christoph Hellwig45566e92009-07-10 23:11:57 +02004362int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4363 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004364{
4365 BlockDriver *drv = bs->drv;
4366 if (!drv)
4367 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004368 if (drv->bdrv_load_vmstate)
4369 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4370 if (bs->file)
4371 return bdrv_load_vmstate(bs->file, buf, pos, size);
4372 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004373}
4374
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004375void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4376{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004377 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004378 return;
4379 }
4380
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004381 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004382}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004383
Kevin Wolf41c695c2012-12-06 14:32:58 +01004384int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4385 const char *tag)
4386{
4387 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4388 bs = bs->file;
4389 }
4390
4391 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4392 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4393 }
4394
4395 return -ENOTSUP;
4396}
4397
Fam Zheng4cc70e92013-11-20 10:01:54 +08004398int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4399{
4400 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4401 bs = bs->file;
4402 }
4403
4404 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4405 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4406 }
4407
4408 return -ENOTSUP;
4409}
4410
Kevin Wolf41c695c2012-12-06 14:32:58 +01004411int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4412{
Max Reitz938789e2014-03-10 23:44:08 +01004413 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004414 bs = bs->file;
4415 }
4416
4417 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4418 return bs->drv->bdrv_debug_resume(bs, tag);
4419 }
4420
4421 return -ENOTSUP;
4422}
4423
4424bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4425{
4426 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4427 bs = bs->file;
4428 }
4429
4430 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4431 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4432 }
4433
4434 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004435}
4436
Blue Swirl199630b2010-07-25 20:49:34 +00004437int bdrv_is_snapshot(BlockDriverState *bs)
4438{
4439 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4440}
4441
Jeff Codyb1b1d782012-10-16 15:49:09 -04004442/* backing_file can either be relative, or absolute, or a protocol. If it is
4443 * relative, it must be relative to the chain. So, passing in bs->filename
4444 * from a BDS as backing_file should not be done, as that may be relative to
4445 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004446BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4447 const char *backing_file)
4448{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004449 char *filename_full = NULL;
4450 char *backing_file_full = NULL;
4451 char *filename_tmp = NULL;
4452 int is_protocol = 0;
4453 BlockDriverState *curr_bs = NULL;
4454 BlockDriverState *retval = NULL;
4455
4456 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004457 return NULL;
4458 }
4459
Jeff Codyb1b1d782012-10-16 15:49:09 -04004460 filename_full = g_malloc(PATH_MAX);
4461 backing_file_full = g_malloc(PATH_MAX);
4462 filename_tmp = g_malloc(PATH_MAX);
4463
4464 is_protocol = path_has_protocol(backing_file);
4465
4466 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4467
4468 /* If either of the filename paths is actually a protocol, then
4469 * compare unmodified paths; otherwise make paths relative */
4470 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4471 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4472 retval = curr_bs->backing_hd;
4473 break;
4474 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004475 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004476 /* If not an absolute filename path, make it relative to the current
4477 * image's filename path */
4478 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4479 backing_file);
4480
4481 /* We are going to compare absolute pathnames */
4482 if (!realpath(filename_tmp, filename_full)) {
4483 continue;
4484 }
4485
4486 /* We need to make sure the backing filename we are comparing against
4487 * is relative to the current image filename (or absolute) */
4488 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4489 curr_bs->backing_file);
4490
4491 if (!realpath(filename_tmp, backing_file_full)) {
4492 continue;
4493 }
4494
4495 if (strcmp(backing_file_full, filename_full) == 0) {
4496 retval = curr_bs->backing_hd;
4497 break;
4498 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004499 }
4500 }
4501
Jeff Codyb1b1d782012-10-16 15:49:09 -04004502 g_free(filename_full);
4503 g_free(backing_file_full);
4504 g_free(filename_tmp);
4505 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004506}
4507
Benoît Canetf198fd12012-08-02 10:22:47 +02004508int bdrv_get_backing_file_depth(BlockDriverState *bs)
4509{
4510 if (!bs->drv) {
4511 return 0;
4512 }
4513
4514 if (!bs->backing_hd) {
4515 return 0;
4516 }
4517
4518 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4519}
4520
bellard83f64092006-08-01 16:21:11 +00004521/**************************************************************/
4522/* async I/Os */
4523
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004524BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4525 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004526 BlockCompletionFunc *cb, void *opaque)
aliguori3b69e4b2009-01-22 16:59:24 +00004527{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004528 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4529
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004530 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004531 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004532}
4533
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004534BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4535 QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004536 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004537{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004538 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4539
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004540 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004541 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004542}
4543
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004544BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004545 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004546 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004547{
4548 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4549
4550 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4551 BDRV_REQ_ZERO_WRITE | flags,
4552 cb, opaque, true);
4553}
4554
Kevin Wolf40b4f532009-09-09 17:53:37 +02004555
4556typedef struct MultiwriteCB {
4557 int error;
4558 int num_requests;
4559 int num_callbacks;
4560 struct {
Markus Armbruster097310b2014-10-07 13:59:15 +02004561 BlockCompletionFunc *cb;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004562 void *opaque;
4563 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004564 } callbacks[];
4565} MultiwriteCB;
4566
4567static void multiwrite_user_cb(MultiwriteCB *mcb)
4568{
4569 int i;
4570
4571 for (i = 0; i < mcb->num_callbacks; i++) {
4572 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004573 if (mcb->callbacks[i].free_qiov) {
4574 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4575 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004576 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004577 }
4578}
4579
4580static void multiwrite_cb(void *opaque, int ret)
4581{
4582 MultiwriteCB *mcb = opaque;
4583
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004584 trace_multiwrite_cb(mcb, ret);
4585
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004586 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004587 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004588 }
4589
4590 mcb->num_requests--;
4591 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004592 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004593 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004594 }
4595}
4596
4597static int multiwrite_req_compare(const void *a, const void *b)
4598{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004599 const BlockRequest *req1 = a, *req2 = b;
4600
4601 /*
4602 * Note that we can't simply subtract req2->sector from req1->sector
4603 * here as that could overflow the return value.
4604 */
4605 if (req1->sector > req2->sector) {
4606 return 1;
4607 } else if (req1->sector < req2->sector) {
4608 return -1;
4609 } else {
4610 return 0;
4611 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004612}
4613
4614/*
4615 * Takes a bunch of requests and tries to merge them. Returns the number of
4616 * requests that remain after merging.
4617 */
4618static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4619 int num_reqs, MultiwriteCB *mcb)
4620{
4621 int i, outidx;
4622
4623 // Sort requests by start sector
4624 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4625
4626 // Check if adjacent requests touch the same clusters. If so, combine them,
4627 // filling up gaps with zero sectors.
4628 outidx = 0;
4629 for (i = 1; i < num_reqs; i++) {
4630 int merge = 0;
4631 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4632
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004633 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004634 if (reqs[i].sector <= oldreq_last) {
4635 merge = 1;
4636 }
4637
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004638 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4639 merge = 0;
4640 }
4641
Peter Lieven6c5a42a2014-10-27 10:18:46 +01004642 if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
4643 reqs[i].nb_sectors > bs->bl.max_transfer_length) {
4644 merge = 0;
4645 }
4646
Kevin Wolf40b4f532009-09-09 17:53:37 +02004647 if (merge) {
4648 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004649 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004650 qemu_iovec_init(qiov,
4651 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4652
4653 // Add the first request to the merged one. If the requests are
4654 // overlapping, drop the last sectors of the first request.
4655 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004656 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004657
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004658 // We should need to add any zeros between the two requests
4659 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004660
4661 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004662 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004663
Stefan Hajnoczi391827e2014-07-30 09:53:30 +01004664 // Add tail of first request, if necessary
4665 if (qiov->size < reqs[outidx].qiov->size) {
4666 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4667 reqs[outidx].qiov->size - qiov->size);
4668 }
4669
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004670 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004671 reqs[outidx].qiov = qiov;
4672
4673 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4674 } else {
4675 outidx++;
4676 reqs[outidx].sector = reqs[i].sector;
4677 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4678 reqs[outidx].qiov = reqs[i].qiov;
4679 }
4680 }
4681
Peter Lievenf4564d52015-02-02 14:52:18 +01004682 block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
4683
Kevin Wolf40b4f532009-09-09 17:53:37 +02004684 return outidx + 1;
4685}
4686
4687/*
4688 * Submit multiple AIO write requests at once.
4689 *
4690 * On success, the function returns 0 and all requests in the reqs array have
4691 * been submitted. In error case this function returns -1, and any of the
4692 * requests may or may not be submitted yet. In particular, this means that the
4693 * callback will be called for some of the requests, for others it won't. The
4694 * caller must check the error field of the BlockRequest to wait for the right
4695 * callbacks (if error != 0, no callback will be called).
4696 *
4697 * The implementation may modify the contents of the reqs array, e.g. to merge
4698 * requests. However, the fields opaque and error are left unmodified as they
4699 * are used to signal failure for a single request to the caller.
4700 */
4701int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4702{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004703 MultiwriteCB *mcb;
4704 int i;
4705
Ryan Harper301db7c2011-03-07 10:01:04 -06004706 /* don't submit writes if we don't have a medium */
4707 if (bs->drv == NULL) {
4708 for (i = 0; i < num_reqs; i++) {
4709 reqs[i].error = -ENOMEDIUM;
4710 }
4711 return -1;
4712 }
4713
Kevin Wolf40b4f532009-09-09 17:53:37 +02004714 if (num_reqs == 0) {
4715 return 0;
4716 }
4717
4718 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004719 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004720 mcb->num_requests = 0;
4721 mcb->num_callbacks = num_reqs;
4722
4723 for (i = 0; i < num_reqs; i++) {
4724 mcb->callbacks[i].cb = reqs[i].cb;
4725 mcb->callbacks[i].opaque = reqs[i].opaque;
4726 }
4727
4728 // Check for mergable requests
4729 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4730
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004731 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4732
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004733 /* Run the aio requests. */
4734 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004735 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004736 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4737 reqs[i].nb_sectors, reqs[i].flags,
4738 multiwrite_cb, mcb,
4739 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004740 }
4741
4742 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004743}
4744
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004745void bdrv_aio_cancel(BlockAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004746{
Fam Zhengca5fd112014-09-11 13:41:27 +08004747 qemu_aio_ref(acb);
4748 bdrv_aio_cancel_async(acb);
4749 while (acb->refcnt > 1) {
4750 if (acb->aiocb_info->get_aio_context) {
4751 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4752 } else if (acb->bs) {
4753 aio_poll(bdrv_get_aio_context(acb->bs), true);
4754 } else {
4755 abort();
Fam Zheng02c50ef2014-09-11 13:41:09 +08004756 }
Fam Zheng02c50ef2014-09-11 13:41:09 +08004757 }
Fam Zheng80074292014-09-11 13:41:28 +08004758 qemu_aio_unref(acb);
Fam Zheng02c50ef2014-09-11 13:41:09 +08004759}
4760
4761/* Async version of aio cancel. The caller is not blocked if the acb implements
4762 * cancel_async, otherwise we do nothing and let the request normally complete.
4763 * In either case the completion callback must be called. */
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004764void bdrv_aio_cancel_async(BlockAIOCB *acb)
Fam Zheng02c50ef2014-09-11 13:41:09 +08004765{
4766 if (acb->aiocb_info->cancel_async) {
4767 acb->aiocb_info->cancel_async(acb);
4768 }
bellard83f64092006-08-01 16:21:11 +00004769}
4770
4771/**************************************************************/
4772/* async block device emulation */
4773
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004774typedef struct BlockAIOCBSync {
4775 BlockAIOCB common;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004776 QEMUBH *bh;
4777 int ret;
4778 /* vector translation state */
4779 QEMUIOVector *qiov;
4780 uint8_t *bounce;
4781 int is_write;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004782} BlockAIOCBSync;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004783
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004784static const AIOCBInfo bdrv_em_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004785 .aiocb_size = sizeof(BlockAIOCBSync),
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004786};
4787
bellard83f64092006-08-01 16:21:11 +00004788static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004789{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004790 BlockAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004791
Kevin Wolf857d4f42014-05-20 13:16:51 +02004792 if (!acb->is_write && acb->ret >= 0) {
Michael Tokarev03396142012-06-07 20:17:55 +04004793 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
Kevin Wolf857d4f42014-05-20 13:16:51 +02004794 }
aliguoriceb42de2009-04-07 18:43:28 +00004795 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004796 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004797 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004798 acb->bh = NULL;
Fam Zheng80074292014-09-11 13:41:28 +08004799 qemu_aio_unref(acb);
bellardbeac80c2006-06-26 20:08:57 +00004800}
bellardbeac80c2006-06-26 20:08:57 +00004801
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004802static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4803 int64_t sector_num,
4804 QEMUIOVector *qiov,
4805 int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004806 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004807 void *opaque,
4808 int is_write)
aliguorif141eaf2009-04-07 18:43:24 +00004809
bellardea2384d2004-08-01 21:59:26 +00004810{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004811 BlockAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004812
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004813 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004814 acb->is_write = is_write;
4815 acb->qiov = qiov;
Kevin Wolf857d4f42014-05-20 13:16:51 +02004816 acb->bounce = qemu_try_blockalign(bs, qiov->size);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004817 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004818
Kevin Wolf857d4f42014-05-20 13:16:51 +02004819 if (acb->bounce == NULL) {
4820 acb->ret = -ENOMEM;
4821 } else if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004822 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004823 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004824 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004825 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004826 }
4827
pbrookce1a14d2006-08-07 02:38:06 +00004828 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004829
pbrookce1a14d2006-08-07 02:38:06 +00004830 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004831}
4832
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004833static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004834 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004835 BlockCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004836{
aliguorif141eaf2009-04-07 18:43:24 +00004837 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004838}
4839
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004840static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
aliguorif141eaf2009-04-07 18:43:24 +00004841 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004842 BlockCompletionFunc *cb, void *opaque)
aliguorif141eaf2009-04-07 18:43:24 +00004843{
4844 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4845}
4846
Kevin Wolf68485422011-06-30 10:05:46 +02004847
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004848typedef struct BlockAIOCBCoroutine {
4849 BlockAIOCB common;
Kevin Wolf68485422011-06-30 10:05:46 +02004850 BlockRequest req;
4851 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004852 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004853 QEMUBH* bh;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004854} BlockAIOCBCoroutine;
Kevin Wolf68485422011-06-30 10:05:46 +02004855
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004856static const AIOCBInfo bdrv_em_co_aiocb_info = {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004857 .aiocb_size = sizeof(BlockAIOCBCoroutine),
Kevin Wolf68485422011-06-30 10:05:46 +02004858};
4859
Paolo Bonzini35246a62011-10-14 10:41:29 +02004860static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004861{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004862 BlockAIOCBCoroutine *acb = opaque;
Kevin Wolf68485422011-06-30 10:05:46 +02004863
4864 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004865
Kevin Wolf68485422011-06-30 10:05:46 +02004866 qemu_bh_delete(acb->bh);
Fam Zheng80074292014-09-11 13:41:28 +08004867 qemu_aio_unref(acb);
Kevin Wolf68485422011-06-30 10:05:46 +02004868}
4869
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004870/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4871static void coroutine_fn bdrv_co_do_rw(void *opaque)
4872{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004873 BlockAIOCBCoroutine *acb = opaque;
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004874 BlockDriverState *bs = acb->common.bs;
4875
4876 if (!acb->is_write) {
4877 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004878 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004879 } else {
4880 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004881 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004882 }
4883
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004884 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004885 qemu_bh_schedule(acb->bh);
4886}
4887
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004888static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4889 int64_t sector_num,
4890 QEMUIOVector *qiov,
4891 int nb_sectors,
4892 BdrvRequestFlags flags,
Markus Armbruster097310b2014-10-07 13:59:15 +02004893 BlockCompletionFunc *cb,
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004894 void *opaque,
4895 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004896{
4897 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004898 BlockAIOCBCoroutine *acb;
Kevin Wolf68485422011-06-30 10:05:46 +02004899
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004900 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004901 acb->req.sector = sector_num;
4902 acb->req.nb_sectors = nb_sectors;
4903 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004904 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004905 acb->is_write = is_write;
4906
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004907 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004908 qemu_coroutine_enter(co, acb);
4909
4910 return &acb->common;
4911}
4912
Paolo Bonzini07f07612011-10-17 12:32:12 +02004913static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004914{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004915 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004916 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004917
Paolo Bonzini07f07612011-10-17 12:32:12 +02004918 acb->req.error = bdrv_co_flush(bs);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004919 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004920 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004921}
4922
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004923BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004924 BlockCompletionFunc *cb, void *opaque)
Alexander Graf016f5cf2010-05-26 17:51:49 +02004925{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004926 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004927
Paolo Bonzini07f07612011-10-17 12:32:12 +02004928 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004929 BlockAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004930
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004931 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004932
Paolo Bonzini07f07612011-10-17 12:32:12 +02004933 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4934 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004935
Alexander Graf016f5cf2010-05-26 17:51:49 +02004936 return &acb->common;
4937}
4938
Paolo Bonzini4265d622011-10-17 12:32:14 +02004939static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4940{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004941 BlockAIOCBCoroutine *acb = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004942 BlockDriverState *bs = acb->common.bs;
4943
4944 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02004945 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004946 qemu_bh_schedule(acb->bh);
4947}
4948
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004949BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
Paolo Bonzini4265d622011-10-17 12:32:14 +02004950 int64_t sector_num, int nb_sectors,
Markus Armbruster097310b2014-10-07 13:59:15 +02004951 BlockCompletionFunc *cb, void *opaque)
Paolo Bonzini4265d622011-10-17 12:32:14 +02004952{
4953 Coroutine *co;
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004954 BlockAIOCBCoroutine *acb;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004955
4956 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4957
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004958 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004959 acb->req.sector = sector_num;
4960 acb->req.nb_sectors = nb_sectors;
4961 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4962 qemu_coroutine_enter(co, acb);
4963
4964 return &acb->common;
4965}
4966
bellardea2384d2004-08-01 21:59:26 +00004967void bdrv_init(void)
4968{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004969 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004970}
pbrookce1a14d2006-08-07 02:38:06 +00004971
Markus Armbrustereb852012009-10-27 18:41:44 +01004972void bdrv_init_with_whitelist(void)
4973{
4974 use_bdrv_whitelist = 1;
4975 bdrv_init();
4976}
4977
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004978void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Markus Armbruster097310b2014-10-07 13:59:15 +02004979 BlockCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004980{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004981 BlockAIOCB *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004982
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004983 acb = g_slice_alloc(aiocb_info->aiocb_size);
4984 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004985 acb->bs = bs;
4986 acb->cb = cb;
4987 acb->opaque = opaque;
Fam Zhengf197fe22014-09-11 13:41:08 +08004988 acb->refcnt = 1;
pbrookce1a14d2006-08-07 02:38:06 +00004989 return acb;
4990}
4991
Fam Zhengf197fe22014-09-11 13:41:08 +08004992void qemu_aio_ref(void *p)
4993{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02004994 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08004995 acb->refcnt++;
4996}
4997
Fam Zheng80074292014-09-11 13:41:28 +08004998void qemu_aio_unref(void *p)
pbrookce1a14d2006-08-07 02:38:06 +00004999{
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005000 BlockAIOCB *acb = p;
Fam Zhengf197fe22014-09-11 13:41:08 +08005001 assert(acb->refcnt > 0);
5002 if (--acb->refcnt == 0) {
5003 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
5004 }
pbrookce1a14d2006-08-07 02:38:06 +00005005}
bellard19cb3732006-08-19 11:45:59 +00005006
5007/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005008/* Coroutine block device emulation */
5009
5010typedef struct CoroutineIOCompletion {
5011 Coroutine *coroutine;
5012 int ret;
5013} CoroutineIOCompletion;
5014
5015static void bdrv_co_io_em_complete(void *opaque, int ret)
5016{
5017 CoroutineIOCompletion *co = opaque;
5018
5019 co->ret = ret;
5020 qemu_coroutine_enter(co->coroutine, NULL);
5021}
5022
5023static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
5024 int nb_sectors, QEMUIOVector *iov,
5025 bool is_write)
5026{
5027 CoroutineIOCompletion co = {
5028 .coroutine = qemu_coroutine_self(),
5029 };
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005030 BlockAIOCB *acb;
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005031
5032 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005033 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
5034 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005035 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01005036 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
5037 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005038 }
5039
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01005040 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005041 if (!acb) {
5042 return -EIO;
5043 }
5044 qemu_coroutine_yield();
5045
5046 return co.ret;
5047}
5048
5049static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
5050 int64_t sector_num, int nb_sectors,
5051 QEMUIOVector *iov)
5052{
5053 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
5054}
5055
5056static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
5057 int64_t sector_num, int nb_sectors,
5058 QEMUIOVector *iov)
5059{
5060 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
5061}
5062
Paolo Bonzini07f07612011-10-17 12:32:12 +02005063static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005064{
Paolo Bonzini07f07612011-10-17 12:32:12 +02005065 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005066
Paolo Bonzini07f07612011-10-17 12:32:12 +02005067 rwco->ret = bdrv_co_flush(rwco->bs);
5068}
5069
5070int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
5071{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005072 int ret;
5073
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005074 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02005075 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005076 }
5077
Kevin Wolfca716362011-11-10 18:13:59 +01005078 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005079 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005080 if (bs->drv->bdrv_co_flush_to_os) {
5081 ret = bs->drv->bdrv_co_flush_to_os(bs);
5082 if (ret < 0) {
5083 return ret;
5084 }
5085 }
5086
Kevin Wolfca716362011-11-10 18:13:59 +01005087 /* But don't actually force it to the disk with cache=unsafe */
5088 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02005089 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01005090 }
5091
Kevin Wolfbf736fe2013-06-05 15:17:55 +02005092 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01005093 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005094 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005095 } else if (bs->drv->bdrv_aio_flush) {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005096 BlockAIOCB *acb;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005097 CoroutineIOCompletion co = {
5098 .coroutine = qemu_coroutine_self(),
5099 };
5100
5101 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5102 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005103 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005104 } else {
5105 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005106 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02005107 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02005108 } else {
5109 /*
5110 * Some block drivers always operate in either writethrough or unsafe
5111 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5112 * know how the server works (because the behaviour is hardcoded or
5113 * depends on server-side configuration), so we can't ensure that
5114 * everything is safe on disk. Returning an error doesn't work because
5115 * that would break guests even if the server operates in writethrough
5116 * mode.
5117 *
5118 * Let's hope the user knows what he's doing.
5119 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005120 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005121 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005122 if (ret < 0) {
5123 return ret;
5124 }
5125
5126 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5127 * in the case of cache=unsafe, so there are no useless flushes.
5128 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02005129flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01005130 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005131}
5132
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005133void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005134{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005135 Error *local_err = NULL;
5136 int ret;
5137
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005138 if (!bs->drv) {
5139 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06005140 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005141
Alexey Kardashevskiy7ea2d262014-10-09 13:50:46 +11005142 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5143 return;
5144 }
5145 bs->open_flags &= ~BDRV_O_INCOMING;
5146
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005147 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005148 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005149 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005150 bdrv_invalidate_cache(bs->file, &local_err);
5151 }
5152 if (local_err) {
5153 error_propagate(errp, local_err);
5154 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01005155 }
5156
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005157 ret = refresh_total_sectors(bs, bs->total_sectors);
5158 if (ret < 0) {
5159 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5160 return;
5161 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005162}
5163
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005164void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06005165{
5166 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005167 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06005168
Benoît Canetdc364f42014-01-23 21:31:32 +01005169 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005170 AioContext *aio_context = bdrv_get_aio_context(bs);
5171
5172 aio_context_acquire(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005173 bdrv_invalidate_cache(bs, &local_err);
Stefan Hajnoczied78cda2014-05-08 16:34:35 +02005174 aio_context_release(aio_context);
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01005175 if (local_err) {
5176 error_propagate(errp, local_err);
5177 return;
5178 }
Anthony Liguori0f154232011-11-14 15:09:45 -06005179 }
5180}
5181
Paolo Bonzini07f07612011-10-17 12:32:12 +02005182int bdrv_flush(BlockDriverState *bs)
5183{
5184 Coroutine *co;
5185 RwCo rwco = {
5186 .bs = bs,
5187 .ret = NOT_DONE,
5188 };
5189
5190 if (qemu_in_coroutine()) {
5191 /* Fast-path if already in coroutine context */
5192 bdrv_flush_co_entry(&rwco);
5193 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005194 AioContext *aio_context = bdrv_get_aio_context(bs);
5195
Paolo Bonzini07f07612011-10-17 12:32:12 +02005196 co = qemu_coroutine_create(bdrv_flush_co_entry);
5197 qemu_coroutine_enter(co, &rwco);
5198 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005199 aio_poll(aio_context, true);
Paolo Bonzini07f07612011-10-17 12:32:12 +02005200 }
5201 }
5202
5203 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02005204}
5205
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005206typedef struct DiscardCo {
5207 BlockDriverState *bs;
5208 int64_t sector_num;
5209 int nb_sectors;
5210 int ret;
5211} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005212static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5213{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005214 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005215
5216 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5217}
5218
5219int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5220 int nb_sectors)
5221{
Max Reitzb9c64942015-02-05 13:58:25 -05005222 int max_discard, ret;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005223
Paolo Bonzini4265d622011-10-17 12:32:14 +02005224 if (!bs->drv) {
5225 return -ENOMEDIUM;
Max Reitzb9c64942015-02-05 13:58:25 -05005226 }
5227
5228 ret = bdrv_check_request(bs, sector_num, nb_sectors);
5229 if (ret < 0) {
5230 return ret;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005231 } else if (bs->read_only) {
5232 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005233 }
5234
Fam Zhenge4654d22013-11-13 18:29:43 +08005235 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01005236
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01005237 /* Do nothing if disabled. */
5238 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5239 return 0;
5240 }
5241
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005242 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005243 return 0;
5244 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005245
Peter Lieven75af1f32015-02-06 11:54:11 +01005246 max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005247 while (nb_sectors > 0) {
5248 int ret;
5249 int num = nb_sectors;
5250
5251 /* align request */
5252 if (bs->bl.discard_alignment &&
5253 num >= bs->bl.discard_alignment &&
5254 sector_num % bs->bl.discard_alignment) {
5255 if (num > bs->bl.discard_alignment) {
5256 num = bs->bl.discard_alignment;
5257 }
5258 num -= sector_num % bs->bl.discard_alignment;
5259 }
5260
5261 /* limit request size */
5262 if (num > max_discard) {
5263 num = max_discard;
5264 }
5265
5266 if (bs->drv->bdrv_co_discard) {
5267 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5268 } else {
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005269 BlockAIOCB *acb;
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005270 CoroutineIOCompletion co = {
5271 .coroutine = qemu_coroutine_self(),
5272 };
5273
5274 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5275 bdrv_co_io_em_complete, &co);
5276 if (acb == NULL) {
5277 return -EIO;
5278 } else {
5279 qemu_coroutine_yield();
5280 ret = co.ret;
5281 }
5282 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01005283 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01005284 return ret;
5285 }
5286
5287 sector_num += num;
5288 nb_sectors -= num;
5289 }
5290 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02005291}
5292
5293int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5294{
5295 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01005296 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02005297 .bs = bs,
5298 .sector_num = sector_num,
5299 .nb_sectors = nb_sectors,
5300 .ret = NOT_DONE,
5301 };
5302
5303 if (qemu_in_coroutine()) {
5304 /* Fast-path if already in coroutine context */
5305 bdrv_discard_co_entry(&rwco);
5306 } else {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005307 AioContext *aio_context = bdrv_get_aio_context(bs);
5308
Paolo Bonzini4265d622011-10-17 12:32:14 +02005309 co = qemu_coroutine_create(bdrv_discard_co_entry);
5310 qemu_coroutine_enter(co, &rwco);
5311 while (rwco.ret == NOT_DONE) {
Stefan Hajnoczi2572b372014-05-08 16:34:34 +02005312 aio_poll(aio_context, true);
Paolo Bonzini4265d622011-10-17 12:32:14 +02005313 }
5314 }
5315
5316 return rwco.ret;
5317}
5318
Kevin Wolff9f05dc2011-07-15 13:50:26 +02005319/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00005320/* removable device support */
5321
5322/**
5323 * Return TRUE if the media is present
5324 */
5325int bdrv_is_inserted(BlockDriverState *bs)
5326{
5327 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005328
bellard19cb3732006-08-19 11:45:59 +00005329 if (!drv)
5330 return 0;
5331 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02005332 return 1;
5333 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00005334}
5335
5336/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005337 * Return whether the media changed since the last call to this
5338 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00005339 */
5340int bdrv_media_changed(BlockDriverState *bs)
5341{
5342 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005343
Markus Armbruster8e49ca42011-08-03 15:08:08 +02005344 if (drv && drv->bdrv_media_changed) {
5345 return drv->bdrv_media_changed(bs);
5346 }
5347 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005348}
5349
5350/**
5351 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5352 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005353void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005354{
5355 BlockDriver *drv = bs->drv;
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005356 const char *device_name;
bellard19cb3732006-08-19 11:45:59 +00005357
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005358 if (drv && drv->bdrv_eject) {
5359 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005360 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005361
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005362 device_name = bdrv_get_device_name(bs);
5363 if (device_name[0] != '\0') {
5364 qapi_event_send_device_tray_moved(device_name,
Wenchao Xiaa5ee7bd2014-06-18 08:43:44 +02005365 eject_flag, &error_abort);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005366 }
bellard19cb3732006-08-19 11:45:59 +00005367}
5368
bellard19cb3732006-08-19 11:45:59 +00005369/**
5370 * Lock or unlock the media (if it is locked, the user won't be able
5371 * to eject it manually).
5372 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005373void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005374{
5375 BlockDriver *drv = bs->drv;
5376
Markus Armbruster025e8492011-09-06 18:58:47 +02005377 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005378
Markus Armbruster025e8492011-09-06 18:58:47 +02005379 if (drv && drv->bdrv_lock_medium) {
5380 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005381 }
5382}
ths985a03b2007-12-24 16:10:43 +00005383
5384/* needed for generic scsi interface */
5385
5386int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5387{
5388 BlockDriver *drv = bs->drv;
5389
5390 if (drv && drv->bdrv_ioctl)
5391 return drv->bdrv_ioctl(bs, req, buf);
5392 return -ENOTSUP;
5393}
aliguori7d780662009-03-12 19:57:08 +00005394
Markus Armbruster7c84b1b2014-10-07 13:59:14 +02005395BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
aliguori221f7152009-03-28 17:28:41 +00005396 unsigned long int req, void *buf,
Markus Armbruster097310b2014-10-07 13:59:15 +02005397 BlockCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005398{
aliguori221f7152009-03-28 17:28:41 +00005399 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005400
aliguori221f7152009-03-28 17:28:41 +00005401 if (drv && drv->bdrv_aio_ioctl)
5402 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5403 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005404}
aliguorie268ca52009-04-22 20:20:00 +00005405
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005406void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005407{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005408 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005409}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005410
aliguorie268ca52009-04-22 20:20:00 +00005411void *qemu_blockalign(BlockDriverState *bs, size_t size)
5412{
Kevin Wolf339064d2013-11-28 10:23:32 +01005413 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005414}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005415
Max Reitz9ebd8442014-10-22 14:09:27 +02005416void *qemu_blockalign0(BlockDriverState *bs, size_t size)
5417{
5418 return memset(qemu_blockalign(bs, size), 0, size);
5419}
5420
Kevin Wolf7d2a35c2014-05-20 12:24:05 +02005421void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5422{
5423 size_t align = bdrv_opt_mem_align(bs);
5424
5425 /* Ensure that NULL is never returned on success */
5426 assert(align > 0);
5427 if (size == 0) {
5428 size = align;
5429 }
5430
5431 return qemu_try_memalign(align, size);
5432}
5433
Max Reitz9ebd8442014-10-22 14:09:27 +02005434void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
5435{
5436 void *mem = qemu_try_blockalign(bs, size);
5437
5438 if (mem) {
5439 memset(mem, 0, size);
5440 }
5441
5442 return mem;
5443}
5444
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005445/*
5446 * Check if all memory in this vector is sector aligned.
5447 */
5448bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5449{
5450 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005451 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005452
5453 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005454 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005455 return false;
5456 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005457 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005458 return false;
5459 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005460 }
5461
5462 return true;
5463}
5464
Fam Zhengb8afb522014-04-16 09:34:30 +08005465BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5466 Error **errp)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005467{
5468 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005469 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005470
Paolo Bonzini50717e92013-01-21 17:09:45 +01005471 assert((granularity & (granularity - 1)) == 0);
5472
Fam Zhenge4654d22013-11-13 18:29:43 +08005473 granularity >>= BDRV_SECTOR_BITS;
5474 assert(granularity);
Markus Armbruster57322b72014-06-26 13:23:22 +02005475 bitmap_size = bdrv_nb_sectors(bs);
Fam Zhengb8afb522014-04-16 09:34:30 +08005476 if (bitmap_size < 0) {
5477 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5478 errno = -bitmap_size;
5479 return NULL;
5480 }
Markus Armbruster5839e532014-08-19 10:31:08 +02005481 bitmap = g_new0(BdrvDirtyBitmap, 1);
Stefan Hajnoczi786a4ea2015-03-23 15:29:26 +00005482 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
Fam Zhenge4654d22013-11-13 18:29:43 +08005483 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5484 return bitmap;
5485}
5486
5487void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5488{
5489 BdrvDirtyBitmap *bm, *next;
5490 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5491 if (bm == bitmap) {
5492 QLIST_REMOVE(bitmap, list);
5493 hbitmap_free(bitmap->bitmap);
5494 g_free(bitmap);
5495 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005496 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005497 }
5498}
5499
Fam Zheng21b56832013-11-13 18:29:44 +08005500BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5501{
5502 BdrvDirtyBitmap *bm;
5503 BlockDirtyInfoList *list = NULL;
5504 BlockDirtyInfoList **plist = &list;
5505
5506 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
Markus Armbruster5839e532014-08-19 10:31:08 +02005507 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5508 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
Fam Zheng21b56832013-11-13 18:29:44 +08005509 info->count = bdrv_get_dirty_count(bs, bm);
5510 info->granularity =
5511 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5512 entry->value = info;
5513 *plist = entry;
5514 plist = &entry->next;
5515 }
5516
5517 return list;
5518}
5519
Fam Zhenge4654d22013-11-13 18:29:43 +08005520int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005521{
Fam Zhenge4654d22013-11-13 18:29:43 +08005522 if (bitmap) {
5523 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005524 } else {
5525 return 0;
5526 }
5527}
5528
Fam Zhenge4654d22013-11-13 18:29:43 +08005529void bdrv_dirty_iter_init(BlockDriverState *bs,
5530 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005531{
Fam Zhenge4654d22013-11-13 18:29:43 +08005532 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005533}
5534
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005535void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5536 int64_t cur_sector, int nr_sectors)
5537{
5538 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5539}
5540
5541void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
5542 int64_t cur_sector, int nr_sectors)
5543{
5544 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5545}
5546
5547static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5548 int nr_sectors)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005549{
Fam Zhenge4654d22013-11-13 18:29:43 +08005550 BdrvDirtyBitmap *bitmap;
5551 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5552 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005553 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005554}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005555
Vladimir Sementsov-Ogievskiyc4237df2014-11-27 12:40:46 +03005556static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
5557 int nr_sectors)
Fam Zhenge4654d22013-11-13 18:29:43 +08005558{
5559 BdrvDirtyBitmap *bitmap;
5560 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5561 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5562 }
5563}
5564
5565int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5566{
5567 return hbitmap_count(bitmap->bitmap);
5568}
5569
Fam Zheng9fcb0252013-08-23 09:14:46 +08005570/* Get a reference to bs */
5571void bdrv_ref(BlockDriverState *bs)
5572{
5573 bs->refcnt++;
5574}
5575
5576/* Release a previously grabbed reference to bs.
5577 * If after releasing, reference count is zero, the BlockDriverState is
5578 * deleted. */
5579void bdrv_unref(BlockDriverState *bs)
5580{
Jeff Cody9a4d5ca2014-07-23 17:22:57 -04005581 if (!bs) {
5582 return;
5583 }
Fam Zheng9fcb0252013-08-23 09:14:46 +08005584 assert(bs->refcnt > 0);
5585 if (--bs->refcnt == 0) {
5586 bdrv_delete(bs);
5587 }
5588}
5589
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005590struct BdrvOpBlocker {
5591 Error *reason;
5592 QLIST_ENTRY(BdrvOpBlocker) list;
5593};
5594
5595bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5596{
5597 BdrvOpBlocker *blocker;
5598 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5599 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5600 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5601 if (errp) {
5602 error_setg(errp, "Device '%s' is busy: %s",
Markus Armbrusterbfb197e2014-10-07 13:59:11 +02005603 bdrv_get_device_name(bs),
5604 error_get_pretty(blocker->reason));
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005605 }
5606 return true;
5607 }
5608 return false;
5609}
5610
5611void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5612{
5613 BdrvOpBlocker *blocker;
5614 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5615
Markus Armbruster5839e532014-08-19 10:31:08 +02005616 blocker = g_new0(BdrvOpBlocker, 1);
Fam Zhengfbe40ff2014-05-23 21:29:42 +08005617 blocker->reason = reason;
5618 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5619}
5620
5621void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5622{
5623 BdrvOpBlocker *blocker, *next;
5624 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5625 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5626 if (blocker->reason == reason) {
5627 QLIST_REMOVE(blocker, list);
5628 g_free(blocker);
5629 }
5630 }
5631}
5632
5633void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5634{
5635 int i;
5636 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5637 bdrv_op_block(bs, i, reason);
5638 }
5639}
5640
5641void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5642{
5643 int i;
5644 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5645 bdrv_op_unblock(bs, i, reason);
5646 }
5647}
5648
5649bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5650{
5651 int i;
5652
5653 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5654 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5655 return false;
5656 }
5657 }
5658 return true;
5659}
5660
Luiz Capitulino28a72822011-09-26 17:43:50 -03005661void bdrv_iostatus_enable(BlockDriverState *bs)
5662{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005663 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005664 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005665}
5666
5667/* The I/O status is only enabled if the drive explicitly
5668 * enables it _and_ the VM is configured to stop on errors */
5669bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5670{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005671 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005672 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5673 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5674 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005675}
5676
5677void bdrv_iostatus_disable(BlockDriverState *bs)
5678{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005679 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005680}
5681
5682void bdrv_iostatus_reset(BlockDriverState *bs)
5683{
5684 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005685 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005686 if (bs->job) {
5687 block_job_iostatus_reset(bs->job);
5688 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005689 }
5690}
5691
Luiz Capitulino28a72822011-09-26 17:43:50 -03005692void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5693{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005694 assert(bdrv_iostatus_is_enabled(bs));
5695 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005696 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5697 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005698 }
5699}
5700
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005701void bdrv_img_create(const char *filename, const char *fmt,
5702 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005703 char *options, uint64_t img_size, int flags,
5704 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005705{
Chunyan Liu83d05212014-06-05 17:20:51 +08005706 QemuOptsList *create_opts = NULL;
5707 QemuOpts *opts = NULL;
5708 const char *backing_fmt, *backing_file;
5709 int64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005710 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005711 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005712 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005713 int ret = 0;
5714
5715 /* Find driver and parse its options */
5716 drv = bdrv_find_format(fmt);
5717 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005718 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005719 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005720 }
5721
Max Reitzb65a5e12015-02-05 13:58:12 -05005722 proto_drv = bdrv_find_protocol(filename, true, errp);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005723 if (!proto_drv) {
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005724 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005725 }
5726
Max Reitzc6149722014-12-02 18:32:45 +01005727 if (!drv->create_opts) {
5728 error_setg(errp, "Format driver '%s' does not support image creation",
5729 drv->format_name);
5730 return;
5731 }
5732
5733 if (!proto_drv->create_opts) {
5734 error_setg(errp, "Protocol driver '%s' does not support image creation",
5735 proto_drv->format_name);
5736 return;
5737 }
5738
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005739 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5740 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005741
5742 /* Create parameter list with default values */
Chunyan Liu83d05212014-06-05 17:20:51 +08005743 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
Markus Armbruster39101f22015-02-12 16:46:36 +01005744 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005745
5746 /* Parse -o options */
5747 if (options) {
Markus Armbrusterdc523cd342015-02-12 18:37:11 +01005748 qemu_opts_do_parse(opts, options, NULL, &local_err);
5749 if (local_err) {
5750 error_report_err(local_err);
5751 local_err = NULL;
Chunyan Liu83d05212014-06-05 17:20:51 +08005752 error_setg(errp, "Invalid options for file format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005753 goto out;
5754 }
5755 }
5756
5757 if (base_filename) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005758 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005759 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005760 error_setg(errp, "Backing file not supported for file format '%s'",
5761 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005762 goto out;
5763 }
5764 }
5765
5766 if (base_fmt) {
Markus Armbrusterf43e47d2015-02-12 17:52:20 +01005767 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
Markus Armbruster6be41942015-02-12 17:49:02 +01005768 if (local_err) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005769 error_setg(errp, "Backing file format not supported for file "
5770 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005771 goto out;
5772 }
5773 }
5774
Chunyan Liu83d05212014-06-05 17:20:51 +08005775 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5776 if (backing_file) {
5777 if (!strcmp(filename, backing_file)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005778 error_setg(errp, "Error: Trying to create an image with the "
5779 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005780 goto out;
5781 }
5782 }
5783
Chunyan Liu83d05212014-06-05 17:20:51 +08005784 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5785 if (backing_fmt) {
5786 backing_drv = bdrv_find_format(backing_fmt);
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005787 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005788 error_setg(errp, "Unknown backing file format '%s'",
Chunyan Liu83d05212014-06-05 17:20:51 +08005789 backing_fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005790 goto out;
5791 }
5792 }
5793
5794 // The size for the image must always be specified, with one exception:
5795 // If we are using a backing file, we can obtain the size from there
Chunyan Liu83d05212014-06-05 17:20:51 +08005796 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5797 if (size == -1) {
5798 if (backing_file) {
Max Reitz66f6b812013-12-03 14:57:52 +01005799 BlockDriverState *bs;
Max Reitz29168012014-11-26 17:20:27 +01005800 char *full_backing = g_new0(char, PATH_MAX);
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005801 int64_t size;
Paolo Bonzini63090da2012-04-12 14:01:03 +02005802 int back_flags;
5803
Max Reitz29168012014-11-26 17:20:27 +01005804 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
5805 full_backing, PATH_MAX,
5806 &local_err);
5807 if (local_err) {
5808 g_free(full_backing);
5809 goto out;
5810 }
5811
Paolo Bonzini63090da2012-04-12 14:01:03 +02005812 /* backing files always opened read-only */
5813 back_flags =
5814 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005815
Max Reitzf67503e2014-02-18 18:33:05 +01005816 bs = NULL;
Max Reitz29168012014-11-26 17:20:27 +01005817 ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005818 backing_drv, &local_err);
Max Reitz29168012014-11-26 17:20:27 +01005819 g_free(full_backing);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005820 if (ret < 0) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005821 goto out;
5822 }
Markus Armbruster52bf1e72014-06-26 13:23:25 +02005823 size = bdrv_getlength(bs);
5824 if (size < 0) {
5825 error_setg_errno(errp, -size, "Could not get size of '%s'",
5826 backing_file);
5827 bdrv_unref(bs);
5828 goto out;
5829 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005830
Markus Armbruster39101f22015-02-12 16:46:36 +01005831 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
Max Reitz66f6b812013-12-03 14:57:52 +01005832
5833 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005834 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005835 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005836 goto out;
5837 }
5838 }
5839
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005840 if (!quiet) {
Fam Zheng43c5d8f2014-12-09 15:38:04 +08005841 printf("Formatting '%s', fmt=%s", filename, fmt);
5842 qemu_opts_print(opts, " ");
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005843 puts("");
5844 }
Chunyan Liu83d05212014-06-05 17:20:51 +08005845
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005846 ret = bdrv_create(drv, filename, opts, &local_err);
Chunyan Liu83d05212014-06-05 17:20:51 +08005847
Max Reitzcc84d902013-09-06 17:14:26 +02005848 if (ret == -EFBIG) {
5849 /* This is generally a better message than whatever the driver would
5850 * deliver (especially because of the cluster_size_hint), since that
5851 * is most probably not much different from "image too large". */
5852 const char *cluster_size_hint = "";
Chunyan Liu83d05212014-06-05 17:20:51 +08005853 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
Max Reitzcc84d902013-09-06 17:14:26 +02005854 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005855 }
Max Reitzcc84d902013-09-06 17:14:26 +02005856 error_setg(errp, "The image size is too large for file format '%s'"
5857 "%s", fmt, cluster_size_hint);
5858 error_free(local_err);
5859 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005860 }
5861
5862out:
Chunyan Liu83d05212014-06-05 17:20:51 +08005863 qemu_opts_del(opts);
5864 qemu_opts_free(create_opts);
Markus Armbruster84d18f02014-01-30 15:07:28 +01005865 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005866 error_propagate(errp, local_err);
5867 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005868}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005869
5870AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5871{
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005872 return bs->aio_context;
5873}
5874
5875void bdrv_detach_aio_context(BlockDriverState *bs)
5876{
Max Reitz33384422014-06-20 21:57:33 +02005877 BdrvAioNotifier *baf;
5878
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005879 if (!bs->drv) {
5880 return;
5881 }
5882
Max Reitz33384422014-06-20 21:57:33 +02005883 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5884 baf->detach_aio_context(baf->opaque);
5885 }
5886
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005887 if (bs->io_limits_enabled) {
5888 throttle_detach_aio_context(&bs->throttle_state);
5889 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005890 if (bs->drv->bdrv_detach_aio_context) {
5891 bs->drv->bdrv_detach_aio_context(bs);
5892 }
5893 if (bs->file) {
5894 bdrv_detach_aio_context(bs->file);
5895 }
5896 if (bs->backing_hd) {
5897 bdrv_detach_aio_context(bs->backing_hd);
5898 }
5899
5900 bs->aio_context = NULL;
5901}
5902
5903void bdrv_attach_aio_context(BlockDriverState *bs,
5904 AioContext *new_context)
5905{
Max Reitz33384422014-06-20 21:57:33 +02005906 BdrvAioNotifier *ban;
5907
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005908 if (!bs->drv) {
5909 return;
5910 }
5911
5912 bs->aio_context = new_context;
5913
5914 if (bs->backing_hd) {
5915 bdrv_attach_aio_context(bs->backing_hd, new_context);
5916 }
5917 if (bs->file) {
5918 bdrv_attach_aio_context(bs->file, new_context);
5919 }
5920 if (bs->drv->bdrv_attach_aio_context) {
5921 bs->drv->bdrv_attach_aio_context(bs, new_context);
5922 }
Stefan Hajnoczi13af91e2014-05-14 16:22:45 +02005923 if (bs->io_limits_enabled) {
5924 throttle_attach_aio_context(&bs->throttle_state, new_context);
5925 }
Max Reitz33384422014-06-20 21:57:33 +02005926
5927 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5928 ban->attached_aio_context(new_context, ban->opaque);
5929 }
Stefan Hajnoczidcd04222014-05-08 16:34:37 +02005930}
5931
5932void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5933{
5934 bdrv_drain_all(); /* ensure there are no in-flight requests */
5935
5936 bdrv_detach_aio_context(bs);
5937
5938 /* This function executes in the old AioContext so acquire the new one in
5939 * case it runs in a different thread.
5940 */
5941 aio_context_acquire(new_context);
5942 bdrv_attach_aio_context(bs, new_context);
5943 aio_context_release(new_context);
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005944}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005945
Max Reitz33384422014-06-20 21:57:33 +02005946void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5947 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5948 void (*detach_aio_context)(void *opaque), void *opaque)
5949{
5950 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5951 *ban = (BdrvAioNotifier){
5952 .attached_aio_context = attached_aio_context,
5953 .detach_aio_context = detach_aio_context,
5954 .opaque = opaque
5955 };
5956
5957 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5958}
5959
5960void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5961 void (*attached_aio_context)(AioContext *,
5962 void *),
5963 void (*detach_aio_context)(void *),
5964 void *opaque)
5965{
5966 BdrvAioNotifier *ban, *ban_next;
5967
5968 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5969 if (ban->attached_aio_context == attached_aio_context &&
5970 ban->detach_aio_context == detach_aio_context &&
5971 ban->opaque == opaque)
5972 {
5973 QLIST_REMOVE(ban, list);
5974 g_free(ban);
5975
5976 return;
5977 }
5978 }
5979
5980 abort();
5981}
5982
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005983void bdrv_add_before_write_notifier(BlockDriverState *bs,
5984 NotifierWithReturn *notifier)
5985{
5986 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5987}
Max Reitz6f176b42013-09-03 10:09:50 +02005988
Max Reitz77485432014-10-27 11:12:50 +01005989int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
5990 BlockDriverAmendStatusCB *status_cb)
Max Reitz6f176b42013-09-03 10:09:50 +02005991{
Chunyan Liuc282e1f2014-06-05 17:21:11 +08005992 if (!bs->drv->bdrv_amend_options) {
Max Reitz6f176b42013-09-03 10:09:50 +02005993 return -ENOTSUP;
5994 }
Max Reitz77485432014-10-27 11:12:50 +01005995 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
Max Reitz6f176b42013-09-03 10:09:50 +02005996}
Benoît Canetf6186f42013-10-02 14:33:48 +02005997
Benoît Canetb5042a32014-03-03 19:11:34 +01005998/* This function will be called by the bdrv_recurse_is_first_non_filter method
5999 * of block filter and by bdrv_is_first_non_filter.
6000 * It is used to test if the given bs is the candidate or recurse more in the
6001 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01006002 */
Benoît Canet212a5a82014-01-23 21:31:36 +01006003bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
6004 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02006005{
Benoît Canetb5042a32014-03-03 19:11:34 +01006006 /* return false if basic checks fails */
6007 if (!bs || !bs->drv) {
6008 return false;
6009 }
6010
6011 /* the code reached a non block filter driver -> check if the bs is
6012 * the same as the candidate. It's the recursion termination condition.
6013 */
6014 if (!bs->drv->is_filter) {
6015 return bs == candidate;
6016 }
6017 /* Down this path the driver is a block filter driver */
6018
6019 /* If the block filter recursion method is defined use it to recurse down
6020 * the node graph.
6021 */
6022 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01006023 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
6024 }
6025
Benoît Canetb5042a32014-03-03 19:11:34 +01006026 /* the driver is a block filter but don't allow to recurse -> return false
6027 */
6028 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01006029}
6030
6031/* This function checks if the candidate is the first non filter bs down it's
6032 * bs chain. Since we don't have pointers to parents it explore all bs chains
6033 * from the top. Some filters can choose not to pass down the recursion.
6034 */
6035bool bdrv_is_first_non_filter(BlockDriverState *candidate)
6036{
6037 BlockDriverState *bs;
6038
6039 /* walk down the bs forest recursively */
6040 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
6041 bool perm;
6042
Benoît Canetb5042a32014-03-03 19:11:34 +01006043 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01006044 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01006045
6046 /* candidate is the first non filter */
6047 if (perm) {
6048 return true;
6049 }
6050 }
6051
6052 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02006053}
Benoît Canet09158f02014-06-27 18:25:25 +02006054
6055BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
6056{
6057 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006058 AioContext *aio_context;
6059
Benoît Canet09158f02014-06-27 18:25:25 +02006060 if (!to_replace_bs) {
6061 error_setg(errp, "Node name '%s' not found", node_name);
6062 return NULL;
6063 }
6064
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006065 aio_context = bdrv_get_aio_context(to_replace_bs);
6066 aio_context_acquire(aio_context);
6067
Benoît Canet09158f02014-06-27 18:25:25 +02006068 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006069 to_replace_bs = NULL;
6070 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006071 }
6072
6073 /* We don't want arbitrary node of the BDS chain to be replaced only the top
6074 * most non filter in order to prevent data corruption.
6075 * Another benefit is that this tests exclude backing files which are
6076 * blocked by the backing blockers.
6077 */
6078 if (!bdrv_is_first_non_filter(to_replace_bs)) {
6079 error_setg(errp, "Only top most non filter can be replaced");
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006080 to_replace_bs = NULL;
6081 goto out;
Benoît Canet09158f02014-06-27 18:25:25 +02006082 }
6083
Stefan Hajnoczi5a7e7a02014-10-21 12:03:58 +01006084out:
6085 aio_context_release(aio_context);
Benoît Canet09158f02014-06-27 18:25:25 +02006086 return to_replace_bs;
6087}
Ming Lei448ad912014-07-04 18:04:33 +08006088
6089void bdrv_io_plug(BlockDriverState *bs)
6090{
6091 BlockDriver *drv = bs->drv;
6092 if (drv && drv->bdrv_io_plug) {
6093 drv->bdrv_io_plug(bs);
6094 } else if (bs->file) {
6095 bdrv_io_plug(bs->file);
6096 }
6097}
6098
6099void bdrv_io_unplug(BlockDriverState *bs)
6100{
6101 BlockDriver *drv = bs->drv;
6102 if (drv && drv->bdrv_io_unplug) {
6103 drv->bdrv_io_unplug(bs);
6104 } else if (bs->file) {
6105 bdrv_io_unplug(bs->file);
6106 }
6107}
6108
6109void bdrv_flush_io_queue(BlockDriverState *bs)
6110{
6111 BlockDriver *drv = bs->drv;
6112 if (drv && drv->bdrv_flush_io_queue) {
6113 drv->bdrv_flush_io_queue(bs);
6114 } else if (bs->file) {
6115 bdrv_flush_io_queue(bs->file);
6116 }
6117}
Max Reitz91af7012014-07-18 20:24:56 +02006118
6119static bool append_open_options(QDict *d, BlockDriverState *bs)
6120{
6121 const QDictEntry *entry;
6122 bool found_any = false;
6123
6124 for (entry = qdict_first(bs->options); entry;
6125 entry = qdict_next(bs->options, entry))
6126 {
6127 /* Only take options for this level and exclude all non-driver-specific
6128 * options */
6129 if (!strchr(qdict_entry_key(entry), '.') &&
6130 strcmp(qdict_entry_key(entry), "node-name"))
6131 {
6132 qobject_incref(qdict_entry_value(entry));
6133 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
6134 found_any = true;
6135 }
6136 }
6137
6138 return found_any;
6139}
6140
6141/* Updates the following BDS fields:
6142 * - exact_filename: A filename which may be used for opening a block device
6143 * which (mostly) equals the given BDS (even without any
6144 * other options; so reading and writing must return the same
6145 * results, but caching etc. may be different)
6146 * - full_open_options: Options which, when given when opening a block device
6147 * (without a filename), result in a BDS (mostly)
6148 * equalling the given one
6149 * - filename: If exact_filename is set, it is copied here. Otherwise,
6150 * full_open_options is converted to a JSON object, prefixed with
6151 * "json:" (for use through the JSON pseudo protocol) and put here.
6152 */
6153void bdrv_refresh_filename(BlockDriverState *bs)
6154{
6155 BlockDriver *drv = bs->drv;
6156 QDict *opts;
6157
6158 if (!drv) {
6159 return;
6160 }
6161
6162 /* This BDS's file name will most probably depend on its file's name, so
6163 * refresh that first */
6164 if (bs->file) {
6165 bdrv_refresh_filename(bs->file);
6166 }
6167
6168 if (drv->bdrv_refresh_filename) {
6169 /* Obsolete information is of no use here, so drop the old file name
6170 * information before refreshing it */
6171 bs->exact_filename[0] = '\0';
6172 if (bs->full_open_options) {
6173 QDECREF(bs->full_open_options);
6174 bs->full_open_options = NULL;
6175 }
6176
6177 drv->bdrv_refresh_filename(bs);
6178 } else if (bs->file) {
6179 /* Try to reconstruct valid information from the underlying file */
6180 bool has_open_options;
6181
6182 bs->exact_filename[0] = '\0';
6183 if (bs->full_open_options) {
6184 QDECREF(bs->full_open_options);
6185 bs->full_open_options = NULL;
6186 }
6187
6188 opts = qdict_new();
6189 has_open_options = append_open_options(opts, bs);
6190
6191 /* If no specific options have been given for this BDS, the filename of
6192 * the underlying file should suffice for this one as well */
6193 if (bs->file->exact_filename[0] && !has_open_options) {
6194 strcpy(bs->exact_filename, bs->file->exact_filename);
6195 }
6196 /* Reconstructing the full options QDict is simple for most format block
6197 * drivers, as long as the full options are known for the underlying
6198 * file BDS. The full options QDict of that file BDS should somehow
6199 * contain a representation of the filename, therefore the following
6200 * suffices without querying the (exact_)filename of this BDS. */
6201 if (bs->file->full_open_options) {
6202 qdict_put_obj(opts, "driver",
6203 QOBJECT(qstring_from_str(drv->format_name)));
6204 QINCREF(bs->file->full_open_options);
6205 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6206
6207 bs->full_open_options = opts;
6208 } else {
6209 QDECREF(opts);
6210 }
6211 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6212 /* There is no underlying file BDS (at least referenced by BDS.file),
6213 * so the full options QDict should be equal to the options given
6214 * specifically for this block device when it was opened (plus the
6215 * driver specification).
6216 * Because those options don't change, there is no need to update
6217 * full_open_options when it's already set. */
6218
6219 opts = qdict_new();
6220 append_open_options(opts, bs);
6221 qdict_put_obj(opts, "driver",
6222 QOBJECT(qstring_from_str(drv->format_name)));
6223
6224 if (bs->exact_filename[0]) {
6225 /* This may not work for all block protocol drivers (some may
6226 * require this filename to be parsed), but we have to find some
6227 * default solution here, so just include it. If some block driver
6228 * does not support pure options without any filename at all or
6229 * needs some special format of the options QDict, it needs to
6230 * implement the driver-specific bdrv_refresh_filename() function.
6231 */
6232 qdict_put_obj(opts, "filename",
6233 QOBJECT(qstring_from_str(bs->exact_filename)));
6234 }
6235
6236 bs->full_open_options = opts;
6237 }
6238
6239 if (bs->exact_filename[0]) {
6240 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6241 } else if (bs->full_open_options) {
6242 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6243 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6244 qstring_get_str(json));
6245 QDECREF(json);
6246 }
6247}
Benoît Canet5366d0c2014-09-05 15:46:18 +02006248
6249/* This accessor function purpose is to allow the device models to access the
6250 * BlockAcctStats structure embedded inside a BlockDriverState without being
6251 * aware of the BlockDriverState structure layout.
6252 * It will go away when the BlockAcctStats structure will be moved inside
6253 * the device models.
6254 */
6255BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6256{
6257 return &bs->stats;
6258}