blob: df2b8d1b4117eed82e4764b5fd43c1ba94115ad2 [file] [log] [blame]
bellardfc01f7e2003-06-30 10:03:06 +00001/*
2 * QEMU System Emulator block driver
ths5fafdf22007-09-16 21:08:06 +00003 *
bellardfc01f7e2003-06-30 10:03:06 +00004 * Copyright (c) 2003 Fabrice Bellard
ths5fafdf22007-09-16 21:08:06 +00005 *
bellardfc01f7e2003-06-30 10:03:06 +00006 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
blueswir13990d092008-12-05 17:53:21 +000024#include "config-host.h"
pbrookfaf07962007-11-11 02:51:17 +000025#include "qemu-common.h"
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +010026#include "trace.h"
Paolo Bonzini83c90892012-12-17 18:19:49 +010027#include "monitor/monitor.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010028#include "block/block_int.h"
29#include "block/blockjob.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010030#include "qemu/module.h"
Paolo Bonzini7b1b5d12012-12-17 18:19:43 +010031#include "qapi/qmp/qjson.h"
Paolo Bonzini9c17d612012-12-17 18:20:04 +010032#include "sysemu/sysemu.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010033#include "qemu/notify.h"
Paolo Bonzini737e1502012-12-17 18:19:44 +010034#include "block/coroutine.h"
Benoît Canetc13163f2014-01-23 21:31:34 +010035#include "block/qapi.h"
Luiz Capitulinob2023812011-09-21 17:16:47 -030036#include "qmp-commands.h"
Paolo Bonzini1de7afc2012-12-17 18:20:00 +010037#include "qemu/timer.h"
bellardfc01f7e2003-06-30 10:03:06 +000038
Juan Quintela71e72a12009-07-27 16:12:56 +020039#ifdef CONFIG_BSD
bellard7674e7b2005-04-26 21:59:26 +000040#include <sys/types.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
Blue Swirl72cf2d42009-09-12 07:36:22 +000043#include <sys/queue.h>
blueswir1c5e97232009-03-07 20:06:23 +000044#ifndef __DragonFly__
bellard7674e7b2005-04-26 21:59:26 +000045#include <sys/disk.h>
46#endif
blueswir1c5e97232009-03-07 20:06:23 +000047#endif
bellard7674e7b2005-04-26 21:59:26 +000048
aliguori49dc7682009-03-08 16:26:59 +000049#ifdef _WIN32
50#include <windows.h>
51#endif
52
Fam Zhenge4654d22013-11-13 18:29:43 +080053struct BdrvDirtyBitmap {
54 HBitmap *bitmap;
55 QLIST_ENTRY(BdrvDirtyBitmap) list;
56};
57
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +010058#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
59
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +020060static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
aliguorif141eaf2009-04-07 18:43:24 +000061static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
62 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
aliguoric87c0672009-04-07 18:43:20 +000063 BlockDriverCompletionFunc *cb, void *opaque);
aliguorif141eaf2009-04-07 18:43:24 +000064static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +000066 BlockDriverCompletionFunc *cb, void *opaque);
Kevin Wolff9f05dc2011-07-15 13:50:26 +020067static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
68 int64_t sector_num, int nb_sectors,
69 QEMUIOVector *iov);
70static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010073static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
74 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczi470c0502012-01-18 14:40:42 +000075 BdrvRequestFlags flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +010076static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +000078 BdrvRequestFlags flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010079static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
80 int64_t sector_num,
81 QEMUIOVector *qiov,
82 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +010083 BdrvRequestFlags flags,
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010084 BlockDriverCompletionFunc *cb,
85 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +010086 bool is_write);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +010087static void coroutine_fn bdrv_co_do_rw(void *opaque);
Kevin Wolf621f0582012-03-20 15:12:58 +010088static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +020089 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
bellardec530c82006-04-25 22:36:06 +000090
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +010091static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
92 QTAILQ_HEAD_INITIALIZER(bdrv_states);
blueswir17ee930d2008-09-17 19:04:14 +000093
Benoît Canetdc364f42014-01-23 21:31:32 +010094static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
96
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +010097static QLIST_HEAD(, BlockDriver) bdrv_drivers =
98 QLIST_HEAD_INITIALIZER(bdrv_drivers);
bellardea2384d2004-08-01 21:59:26 +000099
Markus Armbrustereb852012009-10-27 18:41:44 +0100100/* If non-zero, use only whitelisted block drivers */
101static int use_bdrv_whitelist;
102
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000103#ifdef _WIN32
104static int is_windows_drive_prefix(const char *filename)
105{
106 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
107 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
108 filename[1] == ':');
109}
110
111int is_windows_drive(const char *filename)
112{
113 if (is_windows_drive_prefix(filename) &&
114 filename[2] == '\0')
115 return 1;
116 if (strstart(filename, "\\\\.\\", NULL) ||
117 strstart(filename, "//./", NULL))
118 return 1;
119 return 0;
120}
121#endif
122
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800123/* throttling disk I/O limits */
Benoît Canetcc0681c2013-09-02 14:14:39 +0200124void bdrv_set_io_limits(BlockDriverState *bs,
125 ThrottleConfig *cfg)
126{
127 int i;
128
129 throttle_config(&bs->throttle_state, cfg);
130
131 for (i = 0; i < 2; i++) {
132 qemu_co_enter_next(&bs->throttled_reqs[i]);
133 }
134}
135
136/* this function drain all the throttled IOs */
137static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
138{
139 bool drained = false;
140 bool enabled = bs->io_limits_enabled;
141 int i;
142
143 bs->io_limits_enabled = false;
144
145 for (i = 0; i < 2; i++) {
146 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
147 drained = true;
148 }
149 }
150
151 bs->io_limits_enabled = enabled;
152
153 return drained;
154}
155
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800156void bdrv_io_limits_disable(BlockDriverState *bs)
157{
158 bs->io_limits_enabled = false;
159
Benoît Canetcc0681c2013-09-02 14:14:39 +0200160 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800161
Benoît Canetcc0681c2013-09-02 14:14:39 +0200162 throttle_destroy(&bs->throttle_state);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800163}
164
Benoît Canetcc0681c2013-09-02 14:14:39 +0200165static void bdrv_throttle_read_timer_cb(void *opaque)
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800166{
167 BlockDriverState *bs = opaque;
Benoît Canetcc0681c2013-09-02 14:14:39 +0200168 qemu_co_enter_next(&bs->throttled_reqs[0]);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800169}
170
Benoît Canetcc0681c2013-09-02 14:14:39 +0200171static void bdrv_throttle_write_timer_cb(void *opaque)
172{
173 BlockDriverState *bs = opaque;
174 qemu_co_enter_next(&bs->throttled_reqs[1]);
175}
176
177/* should be called before bdrv_set_io_limits if a limit is set */
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800178void bdrv_io_limits_enable(BlockDriverState *bs)
179{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200180 assert(!bs->io_limits_enabled);
181 throttle_init(&bs->throttle_state,
182 QEMU_CLOCK_VIRTUAL,
183 bdrv_throttle_read_timer_cb,
184 bdrv_throttle_write_timer_cb,
185 bs);
Zhi Yong Wu0563e192011-11-03 16:57:25 +0800186 bs->io_limits_enabled = true;
187}
188
Benoît Canetcc0681c2013-09-02 14:14:39 +0200189/* This function makes an IO wait if needed
190 *
191 * @nb_sectors: the number of sectors of the IO
192 * @is_write: is the IO a write
193 */
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800194static void bdrv_io_limits_intercept(BlockDriverState *bs,
Kevin Wolfd5103582014-01-16 13:29:10 +0100195 unsigned int bytes,
Benoît Canetcc0681c2013-09-02 14:14:39 +0200196 bool is_write)
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800197{
Benoît Canetcc0681c2013-09-02 14:14:39 +0200198 /* does this io must wait */
199 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800200
Benoît Canetcc0681c2013-09-02 14:14:39 +0200201 /* if must wait or any request of this type throttled queue the IO */
202 if (must_wait ||
203 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
204 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800205 }
206
Benoît Canetcc0681c2013-09-02 14:14:39 +0200207 /* the IO will be executed, do the accounting */
Kevin Wolfd5103582014-01-16 13:29:10 +0100208 throttle_account(&bs->throttle_state, is_write, bytes);
209
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800210
Benoît Canetcc0681c2013-09-02 14:14:39 +0200211 /* if the next request must wait -> do nothing */
212 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
213 return;
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800214 }
215
Benoît Canetcc0681c2013-09-02 14:14:39 +0200216 /* else queue next request for execution */
217 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
Zhi Yong Wu98f90db2011-11-08 13:00:14 +0800218}
219
Kevin Wolf339064d2013-11-28 10:23:32 +0100220size_t bdrv_opt_mem_align(BlockDriverState *bs)
221{
222 if (!bs || !bs->drv) {
223 /* 4k should be on the safe side */
224 return 4096;
225 }
226
227 return bs->bl.opt_mem_alignment;
228}
229
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000230/* check if the path starts with "<protocol>:" */
231static int path_has_protocol(const char *path)
232{
Paolo Bonzini947995c2012-05-08 16:51:48 +0200233 const char *p;
234
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000235#ifdef _WIN32
236 if (is_windows_drive(path) ||
237 is_windows_drive_prefix(path)) {
238 return 0;
239 }
Paolo Bonzini947995c2012-05-08 16:51:48 +0200240 p = path + strcspn(path, ":/\\");
241#else
242 p = path + strcspn(path, ":/");
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000243#endif
244
Paolo Bonzini947995c2012-05-08 16:51:48 +0200245 return *p == ':';
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000246}
247
bellard83f64092006-08-01 16:21:11 +0000248int path_is_absolute(const char *path)
249{
bellard21664422007-01-07 18:22:37 +0000250#ifdef _WIN32
251 /* specific case for names like: "\\.\d:" */
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200252 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
bellard21664422007-01-07 18:22:37 +0000253 return 1;
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200254 }
255 return (*path == '/' || *path == '\\');
bellard3b9f94e2007-01-07 17:27:07 +0000256#else
Paolo Bonzinif53f4da2012-05-08 16:51:47 +0200257 return (*path == '/');
bellard3b9f94e2007-01-07 17:27:07 +0000258#endif
bellard83f64092006-08-01 16:21:11 +0000259}
260
261/* if filename is absolute, just copy it to dest. Otherwise, build a
262 path to it by considering it is relative to base_path. URL are
263 supported. */
264void path_combine(char *dest, int dest_size,
265 const char *base_path,
266 const char *filename)
267{
268 const char *p, *p1;
269 int len;
270
271 if (dest_size <= 0)
272 return;
273 if (path_is_absolute(filename)) {
274 pstrcpy(dest, dest_size, filename);
275 } else {
276 p = strchr(base_path, ':');
277 if (p)
278 p++;
279 else
280 p = base_path;
bellard3b9f94e2007-01-07 17:27:07 +0000281 p1 = strrchr(base_path, '/');
282#ifdef _WIN32
283 {
284 const char *p2;
285 p2 = strrchr(base_path, '\\');
286 if (!p1 || p2 > p1)
287 p1 = p2;
288 }
289#endif
bellard83f64092006-08-01 16:21:11 +0000290 if (p1)
291 p1++;
292 else
293 p1 = base_path;
294 if (p1 > p)
295 p = p1;
296 len = p - base_path;
297 if (len > dest_size - 1)
298 len = dest_size - 1;
299 memcpy(dest, base_path, len);
300 dest[len] = '\0';
301 pstrcat(dest, dest_size, filename);
302 }
303}
304
Paolo Bonzinidc5a1372012-05-08 16:51:50 +0200305void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
306{
307 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
308 pstrcpy(dest, sz, bs->backing_file);
309 } else {
310 path_combine(dest, sz, bs->filename, bs->backing_file);
311 }
312}
313
Anthony Liguori5efa9d52009-05-09 17:03:42 -0500314void bdrv_register(BlockDriver *bdrv)
bellardea2384d2004-08-01 21:59:26 +0000315{
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +0100316 /* Block drivers without coroutine functions need emulation */
317 if (!bdrv->bdrv_co_readv) {
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200318 bdrv->bdrv_co_readv = bdrv_co_readv_em;
319 bdrv->bdrv_co_writev = bdrv_co_writev_em;
320
Stefan Hajnoczif8c35c12011-10-13 21:09:31 +0100321 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
322 * the block driver lacks aio we need to emulate that too.
323 */
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200324 if (!bdrv->bdrv_aio_readv) {
325 /* add AIO emulation layer */
326 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
327 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
Kevin Wolff9f05dc2011-07-15 13:50:26 +0200328 }
bellard83f64092006-08-01 16:21:11 +0000329 }
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +0200330
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100331 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
bellardea2384d2004-08-01 21:59:26 +0000332}
bellardb3380822004-03-14 21:38:54 +0000333
334/* create a new block device (by default it is empty) */
335BlockDriverState *bdrv_new(const char *device_name)
bellardfc01f7e2003-06-30 10:03:06 +0000336{
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +0100337 BlockDriverState *bs;
bellardb3380822004-03-14 21:38:54 +0000338
Anthony Liguori7267c092011-08-20 22:09:37 -0500339 bs = g_malloc0(sizeof(BlockDriverState));
Fam Zhenge4654d22013-11-13 18:29:43 +0800340 QLIST_INIT(&bs->dirty_bitmaps);
bellardb3380822004-03-14 21:38:54 +0000341 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
bellardea2384d2004-08-01 21:59:26 +0000342 if (device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +0100343 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
bellardea2384d2004-08-01 21:59:26 +0000344 }
Luiz Capitulino28a72822011-09-26 17:43:50 -0300345 bdrv_iostatus_disable(bs);
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200346 notifier_list_init(&bs->close_notifiers);
Stefan Hajnoczid616b222013-06-24 17:13:10 +0200347 notifier_with_return_list_init(&bs->before_write_notifiers);
Benoît Canetcc0681c2013-09-02 14:14:39 +0200348 qemu_co_queue_init(&bs->throttled_reqs[0]);
349 qemu_co_queue_init(&bs->throttled_reqs[1]);
Fam Zheng9fcb0252013-08-23 09:14:46 +0800350 bs->refcnt = 1;
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200351
bellardb3380822004-03-14 21:38:54 +0000352 return bs;
353}
354
Paolo Bonzinid7d512f2012-08-23 11:20:36 +0200355void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
356{
357 notifier_list_add(&bs->close_notifiers, notify);
358}
359
bellardea2384d2004-08-01 21:59:26 +0000360BlockDriver *bdrv_find_format(const char *format_name)
361{
362 BlockDriver *drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100363 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
364 if (!strcmp(drv1->format_name, format_name)) {
bellardea2384d2004-08-01 21:59:26 +0000365 return drv1;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100366 }
bellardea2384d2004-08-01 21:59:26 +0000367 }
368 return NULL;
369}
370
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800371static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100372{
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800373 static const char *whitelist_rw[] = {
374 CONFIG_BDRV_RW_WHITELIST
375 };
376 static const char *whitelist_ro[] = {
377 CONFIG_BDRV_RO_WHITELIST
Markus Armbrustereb852012009-10-27 18:41:44 +0100378 };
379 const char **p;
380
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800381 if (!whitelist_rw[0] && !whitelist_ro[0]) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100382 return 1; /* no whitelist, anything goes */
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800383 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100384
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800385 for (p = whitelist_rw; *p; p++) {
Markus Armbrustereb852012009-10-27 18:41:44 +0100386 if (!strcmp(drv->format_name, *p)) {
387 return 1;
388 }
389 }
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800390 if (read_only) {
391 for (p = whitelist_ro; *p; p++) {
392 if (!strcmp(drv->format_name, *p)) {
393 return 1;
394 }
395 }
396 }
Markus Armbrustereb852012009-10-27 18:41:44 +0100397 return 0;
398}
399
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800400BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
401 bool read_only)
Markus Armbrustereb852012009-10-27 18:41:44 +0100402{
403 BlockDriver *drv = bdrv_find_format(format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800404 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
Markus Armbrustereb852012009-10-27 18:41:44 +0100405}
406
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800407typedef struct CreateCo {
408 BlockDriver *drv;
409 char *filename;
410 QEMUOptionParameter *options;
411 int ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200412 Error *err;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800413} CreateCo;
414
415static void coroutine_fn bdrv_create_co_entry(void *opaque)
416{
Max Reitzcc84d902013-09-06 17:14:26 +0200417 Error *local_err = NULL;
418 int ret;
419
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800420 CreateCo *cco = opaque;
421 assert(cco->drv);
422
Max Reitzcc84d902013-09-06 17:14:26 +0200423 ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100424 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200425 error_propagate(&cco->err, local_err);
426 }
427 cco->ret = ret;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800428}
429
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200430int bdrv_create(BlockDriver *drv, const char* filename,
Max Reitzcc84d902013-09-06 17:14:26 +0200431 QEMUOptionParameter *options, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000432{
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800433 int ret;
Kevin Wolf0e7e1982009-05-18 16:42:10 +0200434
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800435 Coroutine *co;
436 CreateCo cco = {
437 .drv = drv,
438 .filename = g_strdup(filename),
439 .options = options,
440 .ret = NOT_DONE,
Max Reitzcc84d902013-09-06 17:14:26 +0200441 .err = NULL,
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800442 };
443
444 if (!drv->bdrv_create) {
Max Reitzcc84d902013-09-06 17:14:26 +0200445 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300446 ret = -ENOTSUP;
447 goto out;
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800448 }
449
450 if (qemu_in_coroutine()) {
451 /* Fast-path if already in coroutine context */
452 bdrv_create_co_entry(&cco);
453 } else {
454 co = qemu_coroutine_create(bdrv_create_co_entry);
455 qemu_coroutine_enter(co, &cco);
456 while (cco.ret == NOT_DONE) {
457 qemu_aio_wait();
458 }
459 }
460
461 ret = cco.ret;
Max Reitzcc84d902013-09-06 17:14:26 +0200462 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100463 if (cco.err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200464 error_propagate(errp, cco.err);
465 } else {
466 error_setg_errno(errp, -ret, "Could not create image");
467 }
468 }
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800469
Luiz Capitulino80168bf2012-10-17 16:45:25 -0300470out:
471 g_free(cco.filename);
Zhi Yong Wu5b7e1542012-05-07 16:50:42 +0800472 return ret;
bellardea2384d2004-08-01 21:59:26 +0000473}
474
Max Reitzcc84d902013-09-06 17:14:26 +0200475int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
476 Error **errp)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200477{
478 BlockDriver *drv;
Max Reitzcc84d902013-09-06 17:14:26 +0200479 Error *local_err = NULL;
480 int ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200481
Kevin Wolf98289622013-07-10 15:47:39 +0200482 drv = bdrv_find_protocol(filename, true);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200483 if (drv == NULL) {
Max Reitzcc84d902013-09-06 17:14:26 +0200484 error_setg(errp, "Could not find protocol for file '%s'", filename);
Stefan Hajnoczi16905d72010-11-30 15:14:14 +0000485 return -ENOENT;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200486 }
487
Max Reitzcc84d902013-09-06 17:14:26 +0200488 ret = bdrv_create(drv, filename, options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +0100489 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +0200490 error_propagate(errp, local_err);
491 }
492 return ret;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200493}
494
Kevin Wolf355ef4a2013-12-11 20:14:09 +0100495int bdrv_refresh_limits(BlockDriverState *bs)
Kevin Wolfd34682c2013-12-11 19:26:16 +0100496{
497 BlockDriver *drv = bs->drv;
498
499 memset(&bs->bl, 0, sizeof(bs->bl));
500
Kevin Wolf466ad822013-12-11 19:50:32 +0100501 if (!drv) {
502 return 0;
503 }
504
505 /* Take some limits from the children as a default */
506 if (bs->file) {
507 bdrv_refresh_limits(bs->file);
508 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
Kevin Wolf339064d2013-11-28 10:23:32 +0100509 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
510 } else {
511 bs->bl.opt_mem_alignment = 512;
Kevin Wolf466ad822013-12-11 19:50:32 +0100512 }
513
514 if (bs->backing_hd) {
515 bdrv_refresh_limits(bs->backing_hd);
516 bs->bl.opt_transfer_length =
517 MAX(bs->bl.opt_transfer_length,
518 bs->backing_hd->bl.opt_transfer_length);
Kevin Wolf339064d2013-11-28 10:23:32 +0100519 bs->bl.opt_mem_alignment =
520 MAX(bs->bl.opt_mem_alignment,
521 bs->backing_hd->bl.opt_mem_alignment);
Kevin Wolf466ad822013-12-11 19:50:32 +0100522 }
523
524 /* Then let the driver override it */
525 if (drv->bdrv_refresh_limits) {
Kevin Wolfd34682c2013-12-11 19:26:16 +0100526 return drv->bdrv_refresh_limits(bs);
527 }
528
529 return 0;
530}
531
Jim Meyeringeba25052012-05-28 09:27:54 +0200532/*
533 * Create a uniquely-named empty temporary file.
534 * Return 0 upon success, otherwise a negative errno value.
535 */
536int get_tmp_filename(char *filename, int size)
537{
bellardd5249392004-08-03 21:14:23 +0000538#ifdef _WIN32
bellard3b9f94e2007-01-07 17:27:07 +0000539 char temp_dir[MAX_PATH];
Jim Meyeringeba25052012-05-28 09:27:54 +0200540 /* GetTempFileName requires that its output buffer (4th param)
541 have length MAX_PATH or greater. */
542 assert(size >= MAX_PATH);
543 return (GetTempPath(MAX_PATH, temp_dir)
544 && GetTempFileName(temp_dir, "qem", 0, filename)
545 ? 0 : -GetLastError());
bellardd5249392004-08-03 21:14:23 +0000546#else
bellardea2384d2004-08-01 21:59:26 +0000547 int fd;
blueswir17ccfb2e2008-09-14 06:45:34 +0000548 const char *tmpdir;
aurel320badc1e2008-03-10 00:05:34 +0000549 tmpdir = getenv("TMPDIR");
Amit Shah69bef792014-02-26 15:12:37 +0530550 if (!tmpdir) {
551 tmpdir = "/var/tmp";
552 }
Jim Meyeringeba25052012-05-28 09:27:54 +0200553 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
554 return -EOVERFLOW;
555 }
bellardea2384d2004-08-01 21:59:26 +0000556 fd = mkstemp(filename);
Dunrong Huangfe235a02012-09-05 21:26:22 +0800557 if (fd < 0) {
558 return -errno;
559 }
560 if (close(fd) != 0) {
561 unlink(filename);
Jim Meyeringeba25052012-05-28 09:27:54 +0200562 return -errno;
563 }
564 return 0;
bellardd5249392004-08-03 21:14:23 +0000565#endif
Jim Meyeringeba25052012-05-28 09:27:54 +0200566}
bellardea2384d2004-08-01 21:59:26 +0000567
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200568/*
569 * Detect host devices. By convention, /dev/cdrom[N] is always
570 * recognized as a host CDROM.
571 */
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200572static BlockDriver *find_hdev_driver(const char *filename)
573{
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200574 int score_max = 0, score;
575 BlockDriver *drv = NULL, *d;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200576
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100577 QLIST_FOREACH(d, &bdrv_drivers, list) {
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200578 if (d->bdrv_probe_device) {
579 score = d->bdrv_probe_device(filename);
580 if (score > score_max) {
581 score_max = score;
582 drv = d;
583 }
584 }
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200585 }
586
Christoph Hellwig508c7cb2009-06-15 14:04:22 +0200587 return drv;
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200588}
Christoph Hellwigf3a5d3f2009-06-15 13:55:19 +0200589
Kevin Wolf98289622013-07-10 15:47:39 +0200590BlockDriver *bdrv_find_protocol(const char *filename,
591 bool allow_protocol_prefix)
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200592{
593 BlockDriver *drv1;
594 char protocol[128];
595 int len;
596 const char *p;
597
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200598 /* TODO Drivers without bdrv_file_open must be specified explicitly */
599
Christoph Hellwig39508e72010-06-23 12:25:17 +0200600 /*
601 * XXX(hch): we really should not let host device detection
602 * override an explicit protocol specification, but moving this
603 * later breaks access to device names with colons in them.
604 * Thanks to the brain-dead persistent naming schemes on udev-
605 * based Linux systems those actually are quite common.
606 */
607 drv1 = find_hdev_driver(filename);
608 if (drv1) {
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200609 return drv1;
610 }
Christoph Hellwig39508e72010-06-23 12:25:17 +0200611
Kevin Wolf98289622013-07-10 15:47:39 +0200612 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
Christoph Hellwig39508e72010-06-23 12:25:17 +0200613 return bdrv_find_format("file");
614 }
Kevin Wolf98289622013-07-10 15:47:39 +0200615
Stefan Hajnoczi9e0b22f2010-12-09 11:53:00 +0000616 p = strchr(filename, ':');
617 assert(p != NULL);
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200618 len = p - filename;
619 if (len > sizeof(protocol) - 1)
620 len = sizeof(protocol) - 1;
621 memcpy(protocol, filename, len);
622 protocol[len] = '\0';
623 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
624 if (drv1->protocol_name &&
625 !strcmp(drv1->protocol_name, protocol)) {
626 return drv1;
627 }
628 }
629 return NULL;
630}
631
Kevin Wolff500a6d2012-11-12 17:35:27 +0100632static int find_image_format(BlockDriverState *bs, const char *filename,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200633 BlockDriver **pdrv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +0000634{
Kevin Wolff500a6d2012-11-12 17:35:27 +0100635 int score, score_max;
bellardea2384d2004-08-01 21:59:26 +0000636 BlockDriver *drv1, *drv;
bellard83f64092006-08-01 16:21:11 +0000637 uint8_t buf[2048];
Kevin Wolff500a6d2012-11-12 17:35:27 +0100638 int ret = 0;
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700639
Kevin Wolf08a00552010-06-01 18:37:31 +0200640 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
Paolo Bonzini8e895592013-01-10 15:39:27 +0100641 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
Stefan Weilc98ac352010-07-21 21:51:51 +0200642 drv = bdrv_find_format("raw");
643 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200644 error_setg(errp, "Could not find raw image format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200645 ret = -ENOENT;
646 }
647 *pdrv = drv;
648 return ret;
Nicholas A. Bellinger1a396852010-05-27 08:56:28 -0700649 }
Nicholas Bellingerf8ea0b02010-05-17 09:45:57 -0700650
bellard83f64092006-08-01 16:21:11 +0000651 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
bellard83f64092006-08-01 16:21:11 +0000652 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200653 error_setg_errno(errp, -ret, "Could not read image for determining its "
654 "format");
Stefan Weilc98ac352010-07-21 21:51:51 +0200655 *pdrv = NULL;
656 return ret;
bellard83f64092006-08-01 16:21:11 +0000657 }
658
bellardea2384d2004-08-01 21:59:26 +0000659 score_max = 0;
Christoph Hellwig84a12e62010-04-07 22:30:24 +0200660 drv = NULL;
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +0100661 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
bellard83f64092006-08-01 16:21:11 +0000662 if (drv1->bdrv_probe) {
663 score = drv1->bdrv_probe(buf, ret, filename);
664 if (score > score_max) {
665 score_max = score;
666 drv = drv1;
667 }
bellardea2384d2004-08-01 21:59:26 +0000668 }
669 }
Stefan Weilc98ac352010-07-21 21:51:51 +0200670 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200671 error_setg(errp, "Could not determine image format: No compatible "
672 "driver found");
Stefan Weilc98ac352010-07-21 21:51:51 +0200673 ret = -ENOENT;
674 }
675 *pdrv = drv;
676 return ret;
bellardea2384d2004-08-01 21:59:26 +0000677}
678
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100679/**
680 * Set the current 'total_sectors' value
681 */
682static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
683{
684 BlockDriver *drv = bs->drv;
685
Nicholas Bellinger396759a2010-05-17 09:46:04 -0700686 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
687 if (bs->sg)
688 return 0;
689
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100690 /* query actual device if possible, otherwise just trust the hint */
691 if (drv->bdrv_getlength) {
692 int64_t length = drv->bdrv_getlength(bs);
693 if (length < 0) {
694 return length;
695 }
Fam Zheng7e382002013-11-06 19:48:06 +0800696 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100697 }
698
699 bs->total_sectors = hint;
700 return 0;
701}
702
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100703/**
Paolo Bonzini9e8f1832013-02-08 14:06:11 +0100704 * Set open flags for a given discard mode
705 *
706 * Return 0 on success, -1 if the discard mode was invalid.
707 */
708int bdrv_parse_discard_flags(const char *mode, int *flags)
709{
710 *flags &= ~BDRV_O_UNMAP;
711
712 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
713 /* do nothing */
714 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
715 *flags |= BDRV_O_UNMAP;
716 } else {
717 return -1;
718 }
719
720 return 0;
721}
722
723/**
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100724 * Set open flags for a given cache mode
725 *
726 * Return 0 on success, -1 if the cache mode was invalid.
727 */
728int bdrv_parse_cache_flags(const char *mode, int *flags)
729{
730 *flags &= ~BDRV_O_CACHE_MASK;
731
732 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
733 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
Stefan Hajnoczi92196b22011-08-04 12:26:52 +0100734 } else if (!strcmp(mode, "directsync")) {
735 *flags |= BDRV_O_NOCACHE;
Stefan Hajnoczic3993cd2011-08-04 12:26:51 +0100736 } else if (!strcmp(mode, "writeback")) {
737 *flags |= BDRV_O_CACHE_WB;
738 } else if (!strcmp(mode, "unsafe")) {
739 *flags |= BDRV_O_CACHE_WB;
740 *flags |= BDRV_O_NO_FLUSH;
741 } else if (!strcmp(mode, "writethrough")) {
742 /* this is the default */
743 } else {
744 return -1;
745 }
746
747 return 0;
748}
749
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000750/**
751 * The copy-on-read flag is actually a reference count so multiple users may
752 * use the feature without worrying about clobbering its previous state.
753 * Copy-on-read stays enabled until all users have called to disable it.
754 */
755void bdrv_enable_copy_on_read(BlockDriverState *bs)
756{
757 bs->copy_on_read++;
758}
759
760void bdrv_disable_copy_on_read(BlockDriverState *bs)
761{
762 assert(bs->copy_on_read > 0);
763 bs->copy_on_read--;
764}
765
Kevin Wolf7b272452012-11-12 17:05:39 +0100766static int bdrv_open_flags(BlockDriverState *bs, int flags)
767{
768 int open_flags = flags | BDRV_O_CACHE_WB;
769
770 /*
771 * Clear flags that are internal to the block layer before opening the
772 * image.
773 */
774 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
775
776 /*
777 * Snapshots should be writable.
778 */
779 if (bs->is_temporary) {
780 open_flags |= BDRV_O_RDWR;
781 }
782
783 return open_flags;
784}
785
Benoît Canet6913c0c2014-01-23 21:31:33 +0100786static int bdrv_assign_node_name(BlockDriverState *bs,
787 const char *node_name,
788 Error **errp)
789{
790 if (!node_name) {
791 return 0;
792 }
793
794 /* empty string node name is invalid */
795 if (node_name[0] == '\0') {
796 error_setg(errp, "Empty node name");
797 return -EINVAL;
798 }
799
Benoît Canet0c5e94e2014-02-12 17:15:07 +0100800 /* takes care of avoiding namespaces collisions */
801 if (bdrv_find(node_name)) {
802 error_setg(errp, "node-name=%s is conflicting with a device id",
803 node_name);
804 return -EINVAL;
805 }
806
Benoît Canet6913c0c2014-01-23 21:31:33 +0100807 /* takes care of avoiding duplicates node names */
808 if (bdrv_find_node(node_name)) {
809 error_setg(errp, "Duplicate node name");
810 return -EINVAL;
811 }
812
813 /* copy node name into the bs and insert it into the graph list */
814 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
815 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
816
817 return 0;
818}
819
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200820/*
Kevin Wolf57915332010-04-14 15:24:50 +0200821 * Common part for opening disk images and files
Kevin Wolfb6ad4912013-03-15 10:35:04 +0100822 *
823 * Removes all processed options from *options.
Kevin Wolf57915332010-04-14 15:24:50 +0200824 */
Kevin Wolff500a6d2012-11-12 17:35:27 +0100825static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
Max Reitz34b5d2c2013-09-05 14:45:29 +0200826 QDict *options, int flags, BlockDriver *drv, Error **errp)
Kevin Wolf57915332010-04-14 15:24:50 +0200827{
828 int ret, open_flags;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200829 const char *filename;
Benoît Canet6913c0c2014-01-23 21:31:33 +0100830 const char *node_name = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200831 Error *local_err = NULL;
Kevin Wolf57915332010-04-14 15:24:50 +0200832
833 assert(drv != NULL);
Paolo Bonzini64058752012-05-08 16:51:49 +0200834 assert(bs->file == NULL);
Kevin Wolf707ff822013-03-06 12:20:31 +0100835 assert(options != NULL && bs->options != options);
Kevin Wolf57915332010-04-14 15:24:50 +0200836
Kevin Wolf45673672013-04-22 17:48:40 +0200837 if (file != NULL) {
838 filename = file->filename;
839 } else {
840 filename = qdict_get_try_str(options, "filename");
841 }
842
Kevin Wolf765003d2014-02-03 14:49:42 +0100843 if (drv->bdrv_needs_filename && !filename) {
844 error_setg(errp, "The '%s' block driver requires a file name",
845 drv->format_name);
846 return -EINVAL;
847 }
848
Kevin Wolf45673672013-04-22 17:48:40 +0200849 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
Stefan Hajnoczi28dcee12011-09-22 20:14:12 +0100850
Benoît Canet6913c0c2014-01-23 21:31:33 +0100851 node_name = qdict_get_try_str(options, "node-name");
852 ret = bdrv_assign_node_name(bs, node_name, errp);
853 if (ret < 0) {
854 return ret;
855 }
856 qdict_del(options, "node-name");
857
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100858 /* bdrv_open() with directly using a protocol as drv. This layer is already
859 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
860 * and return immediately. */
861 if (file != NULL && drv->bdrv_file_open) {
862 bdrv_swap(file, bs);
863 return 0;
864 }
865
Kevin Wolf57915332010-04-14 15:24:50 +0200866 bs->open_flags = flags;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +0100867 bs->guest_block_size = 512;
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100868 bs->request_alignment = 512;
Asias He0d51b4d2013-08-22 15:24:14 +0800869 bs->zero_beyond_eof = true;
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800870 open_flags = bdrv_open_flags(bs, flags);
871 bs->read_only = !(open_flags & BDRV_O_RDWR);
872
873 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200874 error_setg(errp,
875 !bs->read_only && bdrv_is_whitelisted(drv, true)
876 ? "Driver '%s' can only be used for read-only devices"
877 : "Driver '%s' is not whitelisted",
878 drv->format_name);
Fam Zhengb64ec4e2013-05-29 19:35:40 +0800879 return -ENOTSUP;
880 }
Kevin Wolf57915332010-04-14 15:24:50 +0200881
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000882 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
Kevin Wolf0ebd24e2013-09-19 15:12:18 +0200883 if (flags & BDRV_O_COPY_ON_READ) {
884 if (!bs->read_only) {
885 bdrv_enable_copy_on_read(bs);
886 } else {
887 error_setg(errp, "Can't use copy-on-read on read-only device");
888 return -EINVAL;
889 }
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +0000890 }
891
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100892 if (filename != NULL) {
893 pstrcpy(bs->filename, sizeof(bs->filename), filename);
894 } else {
895 bs->filename[0] = '\0';
896 }
Kevin Wolf57915332010-04-14 15:24:50 +0200897
Kevin Wolf57915332010-04-14 15:24:50 +0200898 bs->drv = drv;
Anthony Liguori7267c092011-08-20 22:09:37 -0500899 bs->opaque = g_malloc0(drv->instance_size);
Kevin Wolf57915332010-04-14 15:24:50 +0200900
Stefan Hajnoczi03f541b2011-10-27 10:54:28 +0100901 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
Stefan Hajnoczie7c63792011-10-27 10:54:27 +0100902
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200903 /* Open the image, either directly or using a protocol */
904 if (drv->bdrv_file_open) {
Kevin Wolf5d186eb2013-03-27 17:28:18 +0100905 assert(file == NULL);
Benoît Canet030be322013-09-24 17:07:04 +0200906 assert(!drv->bdrv_needs_filename || filename != NULL);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200907 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
Kevin Wolff500a6d2012-11-12 17:35:27 +0100908 } else {
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200909 if (file == NULL) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200910 error_setg(errp, "Can't use '%s' as a block driver for the "
911 "protocol level", drv->format_name);
Kevin Wolf2af5ef72013-04-09 13:19:18 +0200912 ret = -EINVAL;
913 goto free_and_fail;
914 }
Kevin Wolff500a6d2012-11-12 17:35:27 +0100915 bs->file = file;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200916 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
Kevin Wolf66f82ce2010-04-14 14:17:38 +0200917 }
918
Kevin Wolf57915332010-04-14 15:24:50 +0200919 if (ret < 0) {
Markus Armbruster84d18f02014-01-30 15:07:28 +0100920 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200921 error_propagate(errp, local_err);
Dunrong Huang2fa9aa52013-09-24 18:14:01 +0800922 } else if (bs->filename[0]) {
923 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200924 } else {
925 error_setg_errno(errp, -ret, "Could not open image");
926 }
Kevin Wolf57915332010-04-14 15:24:50 +0200927 goto free_and_fail;
928 }
929
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100930 ret = refresh_total_sectors(bs, bs->total_sectors);
931 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200932 error_setg_errno(errp, -ret, "Could not refresh total sector count");
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100933 goto free_and_fail;
Kevin Wolf57915332010-04-14 15:24:50 +0200934 }
Stefan Hajnoczi51762282010-04-19 16:56:41 +0100935
Kevin Wolfd34682c2013-12-11 19:26:16 +0100936 bdrv_refresh_limits(bs);
Paolo Bonzinic25f53b2011-11-29 12:42:20 +0100937 assert(bdrv_opt_mem_align(bs) != 0);
Kevin Wolf47ea2de2014-03-05 15:49:55 +0100938 assert((bs->request_alignment != 0) || bs->sg);
Kevin Wolfd34682c2013-12-11 19:26:16 +0100939
Kevin Wolf57915332010-04-14 15:24:50 +0200940#ifndef _WIN32
941 if (bs->is_temporary) {
Dunrong Huangd4cea8d2013-10-03 01:31:27 +0800942 assert(bs->filename[0] != '\0');
943 unlink(bs->filename);
Kevin Wolf57915332010-04-14 15:24:50 +0200944 }
945#endif
946 return 0;
947
948free_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +0100949 bs->file = NULL;
Anthony Liguori7267c092011-08-20 22:09:37 -0500950 g_free(bs->opaque);
Kevin Wolf57915332010-04-14 15:24:50 +0200951 bs->opaque = NULL;
952 bs->drv = NULL;
953 return ret;
954}
955
956/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200957 * Opens a file using a protocol (file, host_device, nbd, ...)
Kevin Wolf787e4a82013-03-06 11:52:48 +0100958 *
Max Reitz5acd9d82014-02-18 18:33:11 +0100959 * options is an indirect pointer to a QDict of options to pass to the block
960 * drivers, or pointer to NULL for an empty set of options. If this function
961 * takes ownership of the QDict reference, it will set *options to NULL;
962 * otherwise, it will contain unused/unrecognized options after this function
963 * returns. Then, the caller is responsible for freeing it. If it intends to
964 * reuse the QDict, QINCREF() should be called beforehand.
Kevin Wolfb6ce07a2010-04-12 16:37:13 +0200965 */
Max Reitzd4446ea2014-02-18 18:33:09 +0100966static int bdrv_file_open(BlockDriverState *bs, const char *filename,
Max Reitz5acd9d82014-02-18 18:33:11 +0100967 QDict **options, int flags, Error **errp)
bellardb3380822004-03-14 21:38:54 +0000968{
Christoph Hellwig6db95602010-04-05 16:53:57 +0200969 BlockDriver *drv;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100970 const char *drvname;
Kevin Wolfe3fa4bf2014-04-03 12:45:51 +0200971 bool parse_filename = false;
Max Reitz34b5d2c2013-09-05 14:45:29 +0200972 Error *local_err = NULL;
bellard83f64092006-08-01 16:21:11 +0000973 int ret;
974
Kevin Wolf035fccd2013-04-09 14:34:19 +0200975 /* Fetch the file name from the options QDict if necessary */
976 if (!filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +0100977 filename = qdict_get_try_str(*options, "filename");
978 } else if (filename && !qdict_haskey(*options, "filename")) {
979 qdict_put(*options, "filename", qstring_from_str(filename));
Kevin Wolfe3fa4bf2014-04-03 12:45:51 +0200980 parse_filename = true;
Kevin Wolf035fccd2013-04-09 14:34:19 +0200981 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200982 error_setg(errp, "Can't specify 'file' and 'filename' options at the "
983 "same time");
Kevin Wolf035fccd2013-04-09 14:34:19 +0200984 ret = -EINVAL;
985 goto fail;
986 }
987
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100988 /* Find the right block driver */
Max Reitz5acd9d82014-02-18 18:33:11 +0100989 drvname = qdict_get_try_str(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100990 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +0200991 drv = bdrv_find_format(drvname);
Max Reitz34b5d2c2013-09-05 14:45:29 +0200992 if (!drv) {
993 error_setg(errp, "Unknown driver '%s'", drvname);
994 }
Max Reitz5acd9d82014-02-18 18:33:11 +0100995 qdict_del(*options, "driver");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +0100996 } else if (filename) {
Kevin Wolfe3fa4bf2014-04-03 12:45:51 +0200997 drv = bdrv_find_protocol(filename, parse_filename);
Kevin Wolf98289622013-07-10 15:47:39 +0200998 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +0200999 error_setg(errp, "Unknown protocol");
Kevin Wolf98289622013-07-10 15:47:39 +02001000 }
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001001 } else {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001002 error_setg(errp, "Must specify either driver or file");
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001003 drv = NULL;
1004 }
1005
1006 if (!drv) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001007 /* errp has been set already */
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001008 ret = -ENOENT;
1009 goto fail;
1010 }
1011
1012 /* Parse the filename and open it */
Kevin Wolfe3fa4bf2014-04-03 12:45:51 +02001013 if (drv->bdrv_parse_filename && parse_filename) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001014 drv->bdrv_parse_filename(filename, *options, &local_err);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001015 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001016 error_propagate(errp, local_err);
Kevin Wolf6963a302013-03-15 18:47:22 +01001017 ret = -EINVAL;
1018 goto fail;
1019 }
Max Reitzcd5d0312014-03-05 22:41:36 +01001020
1021 if (!drv->bdrv_needs_filename) {
1022 qdict_del(*options, "filename");
1023 } else {
1024 filename = qdict_get_str(*options, "filename");
1025 }
Kevin Wolf6963a302013-03-15 18:47:22 +01001026 }
1027
Max Reitz505d7582013-12-20 19:28:13 +01001028 if (!drv->bdrv_file_open) {
Max Reitz5acd9d82014-02-18 18:33:11 +01001029 ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err);
1030 *options = NULL;
Max Reitz505d7582013-12-20 19:28:13 +01001031 } else {
Max Reitz5acd9d82014-02-18 18:33:11 +01001032 ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err);
Max Reitz505d7582013-12-20 19:28:13 +01001033 }
Kevin Wolf707ff822013-03-06 12:20:31 +01001034 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001035 error_propagate(errp, local_err);
Kevin Wolf707ff822013-03-06 12:20:31 +01001036 goto fail;
1037 }
1038
aliguori71d07702009-03-03 17:37:16 +00001039 bs->growable = 1;
bellard83f64092006-08-01 16:21:11 +00001040 return 0;
Kevin Wolf707ff822013-03-06 12:20:31 +01001041
1042fail:
Kevin Wolf707ff822013-03-06 12:20:31 +01001043 return ret;
bellardea2384d2004-08-01 21:59:26 +00001044}
bellardfc01f7e2003-06-30 10:03:06 +00001045
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001046/*
1047 * Opens the backing file for a BlockDriverState if not yet open
1048 *
1049 * options is a QDict of options to pass to the block drivers, or NULL for an
1050 * empty set of options. The reference to the QDict is transferred to this
1051 * function (even on failure), so if the caller intends to reuse the dictionary,
1052 * it needs to use QINCREF() before calling bdrv_file_open.
1053 */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001054int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
Paolo Bonzini9156df12012-10-18 16:49:17 +02001055{
1056 char backing_filename[PATH_MAX];
1057 int back_flags, ret;
1058 BlockDriver *back_drv = NULL;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001059 Error *local_err = NULL;
Paolo Bonzini9156df12012-10-18 16:49:17 +02001060
1061 if (bs->backing_hd != NULL) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001062 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001063 return 0;
1064 }
1065
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001066 /* NULL means an empty set of options */
1067 if (options == NULL) {
1068 options = qdict_new();
1069 }
1070
Paolo Bonzini9156df12012-10-18 16:49:17 +02001071 bs->open_flags &= ~BDRV_O_NO_BACKING;
Kevin Wolf1cb6f502013-04-12 20:27:07 +02001072 if (qdict_haskey(options, "file.filename")) {
1073 backing_filename[0] = '\0';
1074 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001075 QDECREF(options);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001076 return 0;
Fam Zhengdbecebd2013-09-22 20:05:06 +08001077 } else {
1078 bdrv_get_full_backing_filename(bs, backing_filename,
1079 sizeof(backing_filename));
Paolo Bonzini9156df12012-10-18 16:49:17 +02001080 }
1081
Paolo Bonzini9156df12012-10-18 16:49:17 +02001082 if (bs->backing_format[0] != '\0') {
1083 back_drv = bdrv_find_format(bs->backing_format);
1084 }
1085
1086 /* backing files always opened read-only */
Thibaut LAURENT87a5deb2013-10-25 02:15:07 +02001087 back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT |
1088 BDRV_O_COPY_ON_READ);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001089
Max Reitzf67503e2014-02-18 18:33:05 +01001090 assert(bs->backing_hd == NULL);
1091 ret = bdrv_open(&bs->backing_hd,
Max Reitzddf56362014-02-18 18:33:06 +01001092 *backing_filename ? backing_filename : NULL, NULL, options,
Max Reitz34b5d2c2013-09-05 14:45:29 +02001093 back_flags, back_drv, &local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001094 if (ret < 0) {
Paolo Bonzini9156df12012-10-18 16:49:17 +02001095 bs->backing_hd = NULL;
1096 bs->open_flags |= BDRV_O_NO_BACKING;
Fam Zhengb04b6b62013-11-08 11:26:49 +08001097 error_setg(errp, "Could not open backing file: %s",
1098 error_get_pretty(local_err));
1099 error_free(local_err);
Paolo Bonzini9156df12012-10-18 16:49:17 +02001100 return ret;
1101 }
Peter Feinerd80ac652014-01-08 19:43:25 +00001102
1103 if (bs->backing_hd->file) {
1104 pstrcpy(bs->backing_file, sizeof(bs->backing_file),
1105 bs->backing_hd->file->filename);
1106 }
1107
Kevin Wolfd34682c2013-12-11 19:26:16 +01001108 /* Recalculate the BlockLimits with the backing file */
1109 bdrv_refresh_limits(bs);
1110
Paolo Bonzini9156df12012-10-18 16:49:17 +02001111 return 0;
1112}
1113
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001114/*
Max Reitzda557aa2013-12-20 19:28:11 +01001115 * Opens a disk image whose options are given as BlockdevRef in another block
1116 * device's options.
1117 *
Max Reitzda557aa2013-12-20 19:28:11 +01001118 * If allow_none is true, no image will be opened if filename is false and no
1119 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1120 *
1121 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1122 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1123 * itself, all options starting with "${bdref_key}." are considered part of the
1124 * BlockdevRef.
1125 *
1126 * The BlockdevRef will be removed from the options QDict.
Max Reitzf67503e2014-02-18 18:33:05 +01001127 *
1128 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
Max Reitzda557aa2013-12-20 19:28:11 +01001129 */
1130int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1131 QDict *options, const char *bdref_key, int flags,
Max Reitzf7d9fd82014-02-18 18:33:12 +01001132 bool allow_none, Error **errp)
Max Reitzda557aa2013-12-20 19:28:11 +01001133{
1134 QDict *image_options;
1135 int ret;
1136 char *bdref_key_dot;
1137 const char *reference;
1138
Max Reitzf67503e2014-02-18 18:33:05 +01001139 assert(pbs);
1140 assert(*pbs == NULL);
1141
Max Reitzda557aa2013-12-20 19:28:11 +01001142 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1143 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1144 g_free(bdref_key_dot);
1145
1146 reference = qdict_get_try_str(options, bdref_key);
1147 if (!filename && !reference && !qdict_size(image_options)) {
1148 if (allow_none) {
1149 ret = 0;
1150 } else {
1151 error_setg(errp, "A block device must be specified for \"%s\"",
1152 bdref_key);
1153 ret = -EINVAL;
1154 }
1155 goto done;
1156 }
1157
Max Reitzf7d9fd82014-02-18 18:33:12 +01001158 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
Max Reitzda557aa2013-12-20 19:28:11 +01001159
1160done:
1161 qdict_del(options, bdref_key);
1162 return ret;
1163}
1164
1165/*
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001166 * Opens a disk image (raw, qcow2, vmdk, ...)
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001167 *
1168 * options is a QDict of options to pass to the block drivers, or NULL for an
1169 * empty set of options. The reference to the QDict belongs to the block layer
1170 * after the call (even on failure), so if the caller intends to reuse the
1171 * dictionary, it needs to use QINCREF() before calling bdrv_open.
Max Reitzf67503e2014-02-18 18:33:05 +01001172 *
1173 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1174 * If it is not NULL, the referenced BDS will be reused.
Max Reitzddf56362014-02-18 18:33:06 +01001175 *
1176 * The reference parameter may be used to specify an existing block device which
1177 * should be opened. If specified, neither options nor a filename may be given,
1178 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001179 */
Max Reitzddf56362014-02-18 18:33:06 +01001180int bdrv_open(BlockDriverState **pbs, const char *filename,
1181 const char *reference, QDict *options, int flags,
1182 BlockDriver *drv, Error **errp)
bellardea2384d2004-08-01 21:59:26 +00001183{
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001184 int ret;
Stefan Weil89c9bc32012-11-22 07:25:48 +01001185 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1186 char tmp_filename[PATH_MAX + 1];
Max Reitzf67503e2014-02-18 18:33:05 +01001187 BlockDriverState *file = NULL, *bs;
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001188 const char *drvname;
Max Reitz34b5d2c2013-09-05 14:45:29 +02001189 Error *local_err = NULL;
bellard712e7872005-04-28 21:09:32 +00001190
Max Reitzf67503e2014-02-18 18:33:05 +01001191 assert(pbs);
1192
Max Reitzddf56362014-02-18 18:33:06 +01001193 if (reference) {
1194 bool options_non_empty = options ? qdict_size(options) : false;
1195 QDECREF(options);
1196
1197 if (*pbs) {
1198 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1199 "another block device");
1200 return -EINVAL;
1201 }
1202
1203 if (filename || options_non_empty) {
1204 error_setg(errp, "Cannot reference an existing block device with "
1205 "additional options or a new filename");
1206 return -EINVAL;
1207 }
1208
1209 bs = bdrv_lookup_bs(reference, reference, errp);
1210 if (!bs) {
1211 return -ENODEV;
1212 }
1213 bdrv_ref(bs);
1214 *pbs = bs;
1215 return 0;
1216 }
1217
Max Reitzf67503e2014-02-18 18:33:05 +01001218 if (*pbs) {
1219 bs = *pbs;
1220 } else {
1221 bs = bdrv_new("");
1222 }
1223
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001224 /* NULL means an empty set of options */
1225 if (options == NULL) {
1226 options = qdict_new();
1227 }
1228
1229 bs->options = options;
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001230 options = qdict_clone_shallow(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001231
Max Reitz5469a2a2014-02-18 18:33:10 +01001232 if (flags & BDRV_O_PROTOCOL) {
1233 assert(!drv);
Max Reitz5acd9d82014-02-18 18:33:11 +01001234 ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL,
Max Reitz5469a2a2014-02-18 18:33:10 +01001235 &local_err);
Max Reitz5469a2a2014-02-18 18:33:10 +01001236 if (!ret) {
Kevin Wolfeb909c72014-03-06 16:34:46 +01001237 drv = bs->drv;
Max Reitz5acd9d82014-02-18 18:33:11 +01001238 goto done;
Max Reitz5469a2a2014-02-18 18:33:10 +01001239 } else if (bs->drv) {
1240 goto close_and_fail;
1241 } else {
1242 goto fail;
1243 }
1244 }
1245
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001246 /* For snapshot=on, create a temporary qcow2 overlay */
bellard83f64092006-08-01 16:21:11 +00001247 if (flags & BDRV_O_SNAPSHOT) {
bellardea2384d2004-08-01 21:59:26 +00001248 BlockDriverState *bs1;
1249 int64_t total_size;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001250 BlockDriver *bdrv_qcow2;
Kevin Wolf08b392e2013-03-18 16:17:44 +01001251 QEMUOptionParameter *create_options;
Kevin Wolf9fd31712013-11-14 15:37:12 +01001252 QDict *snapshot_options;
Kevin Wolfc2ad1b02013-03-18 16:40:51 +01001253
bellardea2384d2004-08-01 21:59:26 +00001254 /* if snapshot, we create a temporary backing file and open it
1255 instead of opening 'filename' directly */
1256
Kevin Wolf9fd31712013-11-14 15:37:12 +01001257 /* Get the required size from the image */
Kevin Wolf9fd31712013-11-14 15:37:12 +01001258 QINCREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001259 bs1 = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01001260 ret = bdrv_open(&bs1, filename, NULL, options, BDRV_O_NO_BACKING,
Kevin Wolfc9fbb992013-11-28 11:58:02 +01001261 drv, &local_err);
aliguori51d7c002009-03-05 23:00:29 +00001262 if (ret < 0) {
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001263 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001264 }
Jes Sorensen3e829902010-05-27 16:20:30 +02001265 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
aliguori7c96d462008-09-12 17:54:13 +00001266
Fam Zheng4f6fd342013-08-23 09:14:47 +08001267 bdrv_unref(bs1);
ths3b46e622007-09-17 08:09:54 +00001268
Kevin Wolf9fd31712013-11-14 15:37:12 +01001269 /* Create the temporary image */
Jim Meyeringeba25052012-05-28 09:27:54 +02001270 ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
1271 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001272 error_setg_errno(errp, -ret, "Could not get temporary filename");
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001273 goto fail;
Jim Meyeringeba25052012-05-28 09:27:54 +02001274 }
aliguori7c96d462008-09-12 17:54:13 +00001275
Kevin Wolf91a073a2009-05-27 14:48:06 +02001276 bdrv_qcow2 = bdrv_find_format("qcow2");
Kevin Wolf08b392e2013-03-18 16:17:44 +01001277 create_options = parse_option_parameters("", bdrv_qcow2->create_options,
1278 NULL);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001279
Kevin Wolf08b392e2013-03-18 16:17:44 +01001280 set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
Kevin Wolf91a073a2009-05-27 14:48:06 +02001281
Max Reitzcc84d902013-09-06 17:14:26 +02001282 ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err);
Kevin Wolf08b392e2013-03-18 16:17:44 +01001283 free_option_parameters(create_options);
aliguori51d7c002009-03-05 23:00:29 +00001284 if (ret < 0) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001285 error_setg_errno(errp, -ret, "Could not create temporary overlay "
Max Reitzcc84d902013-09-06 17:14:26 +02001286 "'%s': %s", tmp_filename,
1287 error_get_pretty(local_err));
1288 error_free(local_err);
1289 local_err = NULL;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001290 goto fail;
bellardea2384d2004-08-01 21:59:26 +00001291 }
Kevin Wolf91a073a2009-05-27 14:48:06 +02001292
Kevin Wolf9fd31712013-11-14 15:37:12 +01001293 /* Prepare a new options QDict for the temporary file, where user
1294 * options refer to the backing file */
1295 if (filename) {
1296 qdict_put(options, "file.filename", qstring_from_str(filename));
1297 }
1298 if (drv) {
1299 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1300 }
1301
1302 snapshot_options = qdict_new();
1303 qdict_put(snapshot_options, "backing", options);
1304 qdict_flatten(snapshot_options);
1305
1306 bs->options = snapshot_options;
1307 options = qdict_clone_shallow(bs->options);
1308
bellardea2384d2004-08-01 21:59:26 +00001309 filename = tmp_filename;
Kevin Wolf91a073a2009-05-27 14:48:06 +02001310 drv = bdrv_qcow2;
bellardea2384d2004-08-01 21:59:26 +00001311 bs->is_temporary = 1;
1312 }
bellard712e7872005-04-28 21:09:32 +00001313
Kevin Wolff500a6d2012-11-12 17:35:27 +01001314 /* Open image file without format layer */
Jeff Codybe028ad2012-09-20 15:13:17 -04001315 if (flags & BDRV_O_RDWR) {
1316 flags |= BDRV_O_ALLOW_RDWR;
1317 }
1318
Max Reitzf67503e2014-02-18 18:33:05 +01001319 assert(file == NULL);
Max Reitz054963f2013-12-20 19:28:12 +01001320 ret = bdrv_open_image(&file, filename, options, "file",
Max Reitzf7d9fd82014-02-18 18:33:12 +01001321 bdrv_open_flags(bs, flags | BDRV_O_UNMAP) |
1322 BDRV_O_PROTOCOL, true, &local_err);
Max Reitz054963f2013-12-20 19:28:12 +01001323 if (ret < 0) {
Max Reitz9562f692014-02-15 18:03:21 +01001324 goto unlink_and_fail;
Kevin Wolff500a6d2012-11-12 17:35:27 +01001325 }
1326
1327 /* Find the right image format driver */
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001328 drvname = qdict_get_try_str(options, "driver");
1329 if (drvname) {
Kevin Wolf8f94a6e2013-10-10 11:45:55 +02001330 drv = bdrv_find_format(drvname);
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001331 qdict_del(options, "driver");
Kevin Wolf06d22aa2013-08-08 17:44:52 +02001332 if (!drv) {
1333 error_setg(errp, "Invalid driver: '%s'", drvname);
1334 ret = -EINVAL;
1335 goto unlink_and_fail;
1336 }
Kevin Wolf74fe54f2013-07-09 11:09:02 +02001337 }
1338
Kevin Wolff500a6d2012-11-12 17:35:27 +01001339 if (!drv) {
Max Reitz2a05cbe2013-12-20 19:28:10 +01001340 if (file) {
1341 ret = find_image_format(file, filename, &drv, &local_err);
1342 } else {
1343 error_setg(errp, "Must specify either driver or file");
1344 ret = -EINVAL;
1345 goto unlink_and_fail;
1346 }
Kevin Wolff500a6d2012-11-12 17:35:27 +01001347 }
1348
1349 if (!drv) {
1350 goto unlink_and_fail;
1351 }
1352
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001353 /* Open the image */
Max Reitz34b5d2c2013-09-05 14:45:29 +02001354 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001355 if (ret < 0) {
Christoph Hellwig69873072010-01-20 18:13:25 +01001356 goto unlink_and_fail;
1357 }
1358
Max Reitz2a05cbe2013-12-20 19:28:10 +01001359 if (file && (bs->file != file)) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001360 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001361 file = NULL;
1362 }
1363
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001364 /* If there is a backing file, use it */
Paolo Bonzini9156df12012-10-18 16:49:17 +02001365 if ((flags & BDRV_O_NO_BACKING) == 0) {
Kevin Wolf31ca6d02013-03-28 15:29:24 +01001366 QDict *backing_options;
1367
Benoît Canet5726d872013-09-25 13:30:01 +02001368 qdict_extract_subqdict(options, &backing_options, "backing.");
Max Reitz34b5d2c2013-09-05 14:45:29 +02001369 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001370 if (ret < 0) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001371 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001372 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001373 }
1374
Max Reitz5acd9d82014-02-18 18:33:11 +01001375done:
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001376 /* Check if any unknown options were used */
Max Reitz5acd9d82014-02-18 18:33:11 +01001377 if (options && (qdict_size(options) != 0)) {
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001378 const QDictEntry *entry = qdict_first(options);
Max Reitz5acd9d82014-02-18 18:33:11 +01001379 if (flags & BDRV_O_PROTOCOL) {
1380 error_setg(errp, "Block protocol '%s' doesn't support the option "
1381 "'%s'", drv->format_name, entry->key);
1382 } else {
1383 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1384 "support the option '%s'", drv->format_name,
1385 bs->device_name, entry->key);
1386 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001387
1388 ret = -EINVAL;
1389 goto close_and_fail;
1390 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001391
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001392 if (!bdrv_key_required(bs)) {
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02001393 bdrv_dev_change_media_cb(bs, true);
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001394 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1395 && !runstate_check(RUN_STATE_INMIGRATE)
1396 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1397 error_setg(errp,
1398 "Guest must be stopped for opening of encrypted image");
1399 ret = -EBUSY;
1400 goto close_and_fail;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001401 }
1402
Markus Armbrusterc3adb582014-03-14 09:22:48 +01001403 QDECREF(options);
Max Reitzf67503e2014-02-18 18:33:05 +01001404 *pbs = bs;
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001405 return 0;
1406
1407unlink_and_fail:
Kevin Wolff500a6d2012-11-12 17:35:27 +01001408 if (file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001409 bdrv_unref(file);
Kevin Wolff500a6d2012-11-12 17:35:27 +01001410 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001411 if (bs->is_temporary) {
1412 unlink(filename);
1413 }
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001414fail:
1415 QDECREF(bs->options);
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001416 QDECREF(options);
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001417 bs->options = NULL;
Max Reitzf67503e2014-02-18 18:33:05 +01001418 if (!*pbs) {
1419 /* If *pbs is NULL, a new BDS has been created in this function and
1420 needs to be freed now. Otherwise, it does not need to be closed,
1421 since it has not really been opened yet. */
1422 bdrv_unref(bs);
1423 }
Markus Armbruster84d18f02014-01-30 15:07:28 +01001424 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001425 error_propagate(errp, local_err);
1426 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001427 return ret;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001428
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001429close_and_fail:
Max Reitzf67503e2014-02-18 18:33:05 +01001430 /* See fail path, but now the BDS has to be always closed */
1431 if (*pbs) {
1432 bdrv_close(bs);
1433 } else {
1434 bdrv_unref(bs);
1435 }
Kevin Wolfb6ad4912013-03-15 10:35:04 +01001436 QDECREF(options);
Markus Armbruster84d18f02014-01-30 15:07:28 +01001437 if (local_err) {
Max Reitz34b5d2c2013-09-05 14:45:29 +02001438 error_propagate(errp, local_err);
1439 }
Kevin Wolfb6ce07a2010-04-12 16:37:13 +02001440 return ret;
1441}
1442
Jeff Codye971aa12012-09-20 15:13:19 -04001443typedef struct BlockReopenQueueEntry {
1444 bool prepared;
1445 BDRVReopenState state;
1446 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1447} BlockReopenQueueEntry;
1448
1449/*
1450 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1451 * reopen of multiple devices.
1452 *
1453 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1454 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1455 * be created and initialized. This newly created BlockReopenQueue should be
1456 * passed back in for subsequent calls that are intended to be of the same
1457 * atomic 'set'.
1458 *
1459 * bs is the BlockDriverState to add to the reopen queue.
1460 *
1461 * flags contains the open flags for the associated bs
1462 *
1463 * returns a pointer to bs_queue, which is either the newly allocated
1464 * bs_queue, or the existing bs_queue being used.
1465 *
1466 */
1467BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1468 BlockDriverState *bs, int flags)
1469{
1470 assert(bs != NULL);
1471
1472 BlockReopenQueueEntry *bs_entry;
1473 if (bs_queue == NULL) {
1474 bs_queue = g_new0(BlockReopenQueue, 1);
1475 QSIMPLEQ_INIT(bs_queue);
1476 }
1477
1478 if (bs->file) {
1479 bdrv_reopen_queue(bs_queue, bs->file, flags);
1480 }
1481
1482 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1483 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1484
1485 bs_entry->state.bs = bs;
1486 bs_entry->state.flags = flags;
1487
1488 return bs_queue;
1489}
1490
1491/*
1492 * Reopen multiple BlockDriverStates atomically & transactionally.
1493 *
1494 * The queue passed in (bs_queue) must have been built up previous
1495 * via bdrv_reopen_queue().
1496 *
1497 * Reopens all BDS specified in the queue, with the appropriate
1498 * flags. All devices are prepared for reopen, and failure of any
1499 * device will cause all device changes to be abandonded, and intermediate
1500 * data cleaned up.
1501 *
1502 * If all devices prepare successfully, then the changes are committed
1503 * to all devices.
1504 *
1505 */
1506int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1507{
1508 int ret = -1;
1509 BlockReopenQueueEntry *bs_entry, *next;
1510 Error *local_err = NULL;
1511
1512 assert(bs_queue != NULL);
1513
1514 bdrv_drain_all();
1515
1516 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1517 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1518 error_propagate(errp, local_err);
1519 goto cleanup;
1520 }
1521 bs_entry->prepared = true;
1522 }
1523
1524 /* If we reach this point, we have success and just need to apply the
1525 * changes
1526 */
1527 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1528 bdrv_reopen_commit(&bs_entry->state);
1529 }
1530
1531 ret = 0;
1532
1533cleanup:
1534 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1535 if (ret && bs_entry->prepared) {
1536 bdrv_reopen_abort(&bs_entry->state);
1537 }
1538 g_free(bs_entry);
1539 }
1540 g_free(bs_queue);
1541 return ret;
1542}
1543
1544
1545/* Reopen a single BlockDriverState with the specified flags. */
1546int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1547{
1548 int ret = -1;
1549 Error *local_err = NULL;
1550 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1551
1552 ret = bdrv_reopen_multiple(queue, &local_err);
1553 if (local_err != NULL) {
1554 error_propagate(errp, local_err);
1555 }
1556 return ret;
1557}
1558
1559
1560/*
1561 * Prepares a BlockDriverState for reopen. All changes are staged in the
1562 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1563 * the block driver layer .bdrv_reopen_prepare()
1564 *
1565 * bs is the BlockDriverState to reopen
1566 * flags are the new open flags
1567 * queue is the reopen queue
1568 *
1569 * Returns 0 on success, non-zero on error. On error errp will be set
1570 * as well.
1571 *
1572 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1573 * It is the responsibility of the caller to then call the abort() or
1574 * commit() for any other BDS that have been left in a prepare() state
1575 *
1576 */
1577int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1578 Error **errp)
1579{
1580 int ret = -1;
1581 Error *local_err = NULL;
1582 BlockDriver *drv;
1583
1584 assert(reopen_state != NULL);
1585 assert(reopen_state->bs->drv != NULL);
1586 drv = reopen_state->bs->drv;
1587
1588 /* if we are to stay read-only, do not allow permission change
1589 * to r/w */
1590 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1591 reopen_state->flags & BDRV_O_RDWR) {
1592 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1593 reopen_state->bs->device_name);
1594 goto error;
1595 }
1596
1597
1598 ret = bdrv_flush(reopen_state->bs);
1599 if (ret) {
1600 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1601 strerror(-ret));
1602 goto error;
1603 }
1604
1605 if (drv->bdrv_reopen_prepare) {
1606 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1607 if (ret) {
1608 if (local_err != NULL) {
1609 error_propagate(errp, local_err);
1610 } else {
Luiz Capitulinod8b68952013-06-10 11:29:27 -04001611 error_setg(errp, "failed while preparing to reopen image '%s'",
1612 reopen_state->bs->filename);
Jeff Codye971aa12012-09-20 15:13:19 -04001613 }
1614 goto error;
1615 }
1616 } else {
1617 /* It is currently mandatory to have a bdrv_reopen_prepare()
1618 * handler for each supported drv. */
1619 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1620 drv->format_name, reopen_state->bs->device_name,
1621 "reopening of file");
1622 ret = -1;
1623 goto error;
1624 }
1625
1626 ret = 0;
1627
1628error:
1629 return ret;
1630}
1631
1632/*
1633 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1634 * makes them final by swapping the staging BlockDriverState contents into
1635 * the active BlockDriverState contents.
1636 */
1637void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1638{
1639 BlockDriver *drv;
1640
1641 assert(reopen_state != NULL);
1642 drv = reopen_state->bs->drv;
1643 assert(drv != NULL);
1644
1645 /* If there are any driver level actions to take */
1646 if (drv->bdrv_reopen_commit) {
1647 drv->bdrv_reopen_commit(reopen_state);
1648 }
1649
1650 /* set BDS specific flags now */
1651 reopen_state->bs->open_flags = reopen_state->flags;
1652 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1653 BDRV_O_CACHE_WB);
1654 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
Kevin Wolf355ef4a2013-12-11 20:14:09 +01001655
1656 bdrv_refresh_limits(reopen_state->bs);
Jeff Codye971aa12012-09-20 15:13:19 -04001657}
1658
1659/*
1660 * Abort the reopen, and delete and free the staged changes in
1661 * reopen_state
1662 */
1663void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1664{
1665 BlockDriver *drv;
1666
1667 assert(reopen_state != NULL);
1668 drv = reopen_state->bs->drv;
1669 assert(drv != NULL);
1670
1671 if (drv->bdrv_reopen_abort) {
1672 drv->bdrv_reopen_abort(reopen_state);
1673 }
1674}
1675
1676
bellardfc01f7e2003-06-30 10:03:06 +00001677void bdrv_close(BlockDriverState *bs)
1678{
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001679 if (bs->job) {
1680 block_job_cancel_sync(bs->job);
1681 }
Stefan Hajnoczi58fda172013-07-02 15:36:25 +02001682 bdrv_drain_all(); /* complete I/O */
1683 bdrv_flush(bs);
1684 bdrv_drain_all(); /* in case flush left pending I/O */
Paolo Bonzinid7d512f2012-08-23 11:20:36 +02001685 notifier_list_notify(&bs->close_notifiers, bs);
Kevin Wolf7094f122012-04-11 11:06:37 +02001686
Paolo Bonzini3cbc0022012-10-19 11:36:48 +02001687 if (bs->drv) {
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001688 if (bs->backing_hd) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001689 bdrv_unref(bs->backing_hd);
Stefan Hajnoczi557df6a2010-04-17 10:49:06 +01001690 bs->backing_hd = NULL;
1691 }
bellardea2384d2004-08-01 21:59:26 +00001692 bs->drv->bdrv_close(bs);
Anthony Liguori7267c092011-08-20 22:09:37 -05001693 g_free(bs->opaque);
bellardea2384d2004-08-01 21:59:26 +00001694#ifdef _WIN32
1695 if (bs->is_temporary) {
1696 unlink(bs->filename);
1697 }
bellard67b915a2004-03-31 23:37:16 +00001698#endif
bellardea2384d2004-08-01 21:59:26 +00001699 bs->opaque = NULL;
1700 bs->drv = NULL;
Stefan Hajnoczi53fec9d2011-11-28 16:08:47 +00001701 bs->copy_on_read = 0;
Paolo Bonzinia275fa42012-05-08 16:51:43 +02001702 bs->backing_file[0] = '\0';
1703 bs->backing_format[0] = '\0';
Paolo Bonzini64058752012-05-08 16:51:49 +02001704 bs->total_sectors = 0;
1705 bs->encrypted = 0;
1706 bs->valid_key = 0;
1707 bs->sg = 0;
1708 bs->growable = 0;
Asias He0d51b4d2013-08-22 15:24:14 +08001709 bs->zero_beyond_eof = false;
Kevin Wolfde9c0ce2013-03-15 10:35:02 +01001710 QDECREF(bs->options);
1711 bs->options = NULL;
bellardb3380822004-03-14 21:38:54 +00001712
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001713 if (bs->file != NULL) {
Fam Zheng4f6fd342013-08-23 09:14:47 +08001714 bdrv_unref(bs->file);
Paolo Bonzini0ac93772012-05-08 16:51:44 +02001715 bs->file = NULL;
Kevin Wolf66f82ce2010-04-14 14:17:38 +02001716 }
bellardb3380822004-03-14 21:38:54 +00001717 }
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001718
Pavel Hrdina9ca11152012-08-09 12:44:48 +02001719 bdrv_dev_change_media_cb(bs, false);
1720
Zhi Yong Wu98f90db2011-11-08 13:00:14 +08001721 /*throttling disk I/O limits*/
1722 if (bs->io_limits_enabled) {
1723 bdrv_io_limits_disable(bs);
1724 }
bellardb3380822004-03-14 21:38:54 +00001725}
1726
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001727void bdrv_close_all(void)
1728{
1729 BlockDriverState *bs;
1730
Benoît Canetdc364f42014-01-23 21:31:32 +01001731 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
MORITA Kazutaka2bc93fe2010-05-28 11:44:57 +09001732 bdrv_close(bs);
1733 }
1734}
1735
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001736/* Check if any requests are in-flight (including throttled requests) */
1737static bool bdrv_requests_pending(BlockDriverState *bs)
1738{
1739 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1740 return true;
1741 }
Benoît Canetcc0681c2013-09-02 14:14:39 +02001742 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1743 return true;
1744 }
1745 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001746 return true;
1747 }
1748 if (bs->file && bdrv_requests_pending(bs->file)) {
1749 return true;
1750 }
1751 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1752 return true;
1753 }
1754 return false;
1755}
1756
1757static bool bdrv_requests_pending_all(void)
1758{
1759 BlockDriverState *bs;
Benoît Canetdc364f42014-01-23 21:31:32 +01001760 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001761 if (bdrv_requests_pending(bs)) {
1762 return true;
1763 }
1764 }
1765 return false;
1766}
1767
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001768/*
1769 * Wait for pending requests to complete across all BlockDriverStates
1770 *
1771 * This function does not flush data to disk, use bdrv_flush_all() for that
1772 * after calling this function.
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001773 *
1774 * Note that completion of an asynchronous I/O operation can trigger any
1775 * number of other I/O operations on other devices---for example a coroutine
1776 * can be arbitrarily complex and a constant flow of I/O can come until the
1777 * coroutine is complete. Because of this, it is not possible to have a
1778 * function to drain a single device's I/O queue.
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001779 */
1780void bdrv_drain_all(void)
1781{
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001782 /* Always run first iteration so any pending completion BHs run */
1783 bool busy = true;
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001784 BlockDriverState *bs;
1785
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001786 while (busy) {
Benoît Canetdc364f42014-01-23 21:31:32 +01001787 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi0b06ef32013-11-26 16:18:00 +01001788 bdrv_start_throttled_reqs(bs);
Zhi Yong Wu4c355d52012-04-12 14:00:57 +02001789 }
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001790
Stefan Hajnoczi88266f52013-04-11 15:41:13 +02001791 busy = bdrv_requests_pending_all();
1792 busy |= aio_poll(qemu_get_aio_context(), busy);
Stefan Hajnoczi922453b2011-11-30 12:23:43 +00001793 }
1794}
1795
Benoît Canetdc364f42014-01-23 21:31:32 +01001796/* make a BlockDriverState anonymous by removing from bdrv_state and
1797 * graph_bdrv_state list.
Ryan Harperd22b2f42011-03-29 20:51:47 -05001798 Also, NULL terminate the device_name to prevent double remove */
1799void bdrv_make_anon(BlockDriverState *bs)
1800{
1801 if (bs->device_name[0] != '\0') {
Benoît Canetdc364f42014-01-23 21:31:32 +01001802 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
Ryan Harperd22b2f42011-03-29 20:51:47 -05001803 }
1804 bs->device_name[0] = '\0';
Benoît Canetdc364f42014-01-23 21:31:32 +01001805 if (bs->node_name[0] != '\0') {
1806 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1807 }
1808 bs->node_name[0] = '\0';
Ryan Harperd22b2f42011-03-29 20:51:47 -05001809}
1810
Paolo Bonzinie023b2e2012-05-08 16:51:41 +02001811static void bdrv_rebind(BlockDriverState *bs)
1812{
1813 if (bs->drv && bs->drv->bdrv_rebind) {
1814 bs->drv->bdrv_rebind(bs);
1815 }
1816}
1817
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001818static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1819 BlockDriverState *bs_src)
1820{
1821 /* move some fields that need to stay attached to the device */
1822 bs_dest->open_flags = bs_src->open_flags;
1823
1824 /* dev info */
1825 bs_dest->dev_ops = bs_src->dev_ops;
1826 bs_dest->dev_opaque = bs_src->dev_opaque;
1827 bs_dest->dev = bs_src->dev;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001828 bs_dest->guest_block_size = bs_src->guest_block_size;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001829 bs_dest->copy_on_read = bs_src->copy_on_read;
1830
1831 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1832
Benoît Canetcc0681c2013-09-02 14:14:39 +02001833 /* i/o throttled req */
1834 memcpy(&bs_dest->throttle_state,
1835 &bs_src->throttle_state,
1836 sizeof(ThrottleState));
1837 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1838 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001839 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1840
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001841 /* r/w error */
1842 bs_dest->on_read_error = bs_src->on_read_error;
1843 bs_dest->on_write_error = bs_src->on_write_error;
1844
1845 /* i/o status */
1846 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
1847 bs_dest->iostatus = bs_src->iostatus;
1848
1849 /* dirty bitmap */
Fam Zhenge4654d22013-11-13 18:29:43 +08001850 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001851
Fam Zheng9fcb0252013-08-23 09:14:46 +08001852 /* reference count */
1853 bs_dest->refcnt = bs_src->refcnt;
1854
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001855 /* job */
1856 bs_dest->in_use = bs_src->in_use;
1857 bs_dest->job = bs_src->job;
1858
1859 /* keep the same entry in bdrv_states */
1860 pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
1861 bs_src->device_name);
Benoît Canetdc364f42014-01-23 21:31:32 +01001862 bs_dest->device_list = bs_src->device_list;
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001863}
1864
1865/*
1866 * Swap bs contents for two image chains while they are live,
1867 * while keeping required fields on the BlockDriverState that is
1868 * actually attached to a device.
1869 *
1870 * This will modify the BlockDriverState fields, and swap contents
1871 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1872 *
1873 * bs_new is required to be anonymous.
1874 *
1875 * This function does not create any image files.
1876 */
1877void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
1878{
1879 BlockDriverState tmp;
1880
Benoît Canet90ce8a02014-03-05 23:48:29 +01001881 /* The code needs to swap the node_name but simply swapping node_list won't
1882 * work so first remove the nodes from the graph list, do the swap then
1883 * insert them back if needed.
1884 */
1885 if (bs_new->node_name[0] != '\0') {
1886 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
1887 }
1888 if (bs_old->node_name[0] != '\0') {
1889 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
1890 }
1891
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001892 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1893 assert(bs_new->device_name[0] == '\0');
Fam Zhenge4654d22013-11-13 18:29:43 +08001894 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001895 assert(bs_new->job == NULL);
1896 assert(bs_new->dev == NULL);
1897 assert(bs_new->in_use == 0);
1898 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001899 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001900
1901 tmp = *bs_new;
1902 *bs_new = *bs_old;
1903 *bs_old = tmp;
1904
1905 /* there are some fields that should not be swapped, move them back */
1906 bdrv_move_feature_fields(&tmp, bs_old);
1907 bdrv_move_feature_fields(bs_old, bs_new);
1908 bdrv_move_feature_fields(bs_new, &tmp);
1909
1910 /* bs_new shouldn't be in bdrv_states even after the swap! */
1911 assert(bs_new->device_name[0] == '\0');
1912
1913 /* Check a few fields that should remain attached to the device */
1914 assert(bs_new->dev == NULL);
1915 assert(bs_new->job == NULL);
1916 assert(bs_new->in_use == 0);
1917 assert(bs_new->io_limits_enabled == false);
Benoît Canetcc0681c2013-09-02 14:14:39 +02001918 assert(!throttle_have_timer(&bs_new->throttle_state));
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001919
Benoît Canet90ce8a02014-03-05 23:48:29 +01001920 /* insert the nodes back into the graph node list if needed */
1921 if (bs_new->node_name[0] != '\0') {
1922 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
1923 }
1924 if (bs_old->node_name[0] != '\0') {
1925 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
1926 }
1927
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001928 bdrv_rebind(bs_new);
1929 bdrv_rebind(bs_old);
1930}
1931
Jeff Cody8802d1f2012-02-28 15:54:06 -05001932/*
1933 * Add new bs contents at the top of an image chain while the chain is
1934 * live, while keeping required fields on the top layer.
1935 *
1936 * This will modify the BlockDriverState fields, and swap contents
1937 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1938 *
Jeff Codyf6801b82012-03-27 16:30:19 -04001939 * bs_new is required to be anonymous.
1940 *
Jeff Cody8802d1f2012-02-28 15:54:06 -05001941 * This function does not create any image files.
1942 */
1943void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
1944{
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001945 bdrv_swap(bs_new, bs_top);
Jeff Cody8802d1f2012-02-28 15:54:06 -05001946
1947 /* The contents of 'tmp' will become bs_top, as we are
1948 * swapping bs_new and bs_top contents. */
Paolo Bonzini4ddc07c2012-06-14 16:55:02 +02001949 bs_top->backing_hd = bs_new;
1950 bs_top->open_flags &= ~BDRV_O_NO_BACKING;
1951 pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
1952 bs_new->filename);
1953 pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
1954 bs_new->drv ? bs_new->drv->format_name : "");
Jeff Cody8802d1f2012-02-28 15:54:06 -05001955}
1956
Fam Zheng4f6fd342013-08-23 09:14:47 +08001957static void bdrv_delete(BlockDriverState *bs)
bellardb3380822004-03-14 21:38:54 +00001958{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001959 assert(!bs->dev);
Paolo Bonzini3e914652012-03-30 13:17:11 +02001960 assert(!bs->job);
1961 assert(!bs->in_use);
Fam Zheng4f6fd342013-08-23 09:14:47 +08001962 assert(!bs->refcnt);
Fam Zhenge4654d22013-11-13 18:29:43 +08001963 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Markus Armbruster18846de2010-06-29 16:58:30 +02001964
Stefan Hajnoczie1b5c522013-06-27 15:32:26 +02001965 bdrv_close(bs);
1966
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01001967 /* remove from list, if necessary */
Ryan Harperd22b2f42011-03-29 20:51:47 -05001968 bdrv_make_anon(bs);
aurel3234c6f052008-04-08 19:51:21 +00001969
Anthony Liguori7267c092011-08-20 22:09:37 -05001970 g_free(bs);
bellardfc01f7e2003-06-30 10:03:06 +00001971}
1972
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001973int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1974/* TODO change to DeviceState *dev when all users are qdevified */
Markus Armbruster18846de2010-06-29 16:58:30 +02001975{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001976 if (bs->dev) {
Markus Armbruster18846de2010-06-29 16:58:30 +02001977 return -EBUSY;
1978 }
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001979 bs->dev = dev;
Luiz Capitulino28a72822011-09-26 17:43:50 -03001980 bdrv_iostatus_reset(bs);
Markus Armbruster18846de2010-06-29 16:58:30 +02001981 return 0;
1982}
1983
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001984/* TODO qdevified devices don't use this, remove when devices are qdevified */
1985void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
Markus Armbruster18846de2010-06-29 16:58:30 +02001986{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02001987 if (bdrv_attach_dev(bs, dev) < 0) {
1988 abort();
1989 }
1990}
1991
1992void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1993/* TODO change to DeviceState *dev when all users are qdevified */
1994{
1995 assert(bs->dev == dev);
1996 bs->dev = NULL;
Markus Armbruster0e49de52011-08-03 15:07:41 +02001997 bs->dev_ops = NULL;
1998 bs->dev_opaque = NULL;
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01001999 bs->guest_block_size = 512;
Markus Armbruster18846de2010-06-29 16:58:30 +02002000}
2001
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002002/* TODO change to return DeviceState * when all users are qdevified */
2003void *bdrv_get_attached_dev(BlockDriverState *bs)
Markus Armbruster18846de2010-06-29 16:58:30 +02002004{
Markus Armbrusterfa879d62011-08-03 15:07:40 +02002005 return bs->dev;
Markus Armbruster18846de2010-06-29 16:58:30 +02002006}
2007
Markus Armbruster0e49de52011-08-03 15:07:41 +02002008void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2009 void *opaque)
2010{
2011 bs->dev_ops = ops;
2012 bs->dev_opaque = opaque;
2013}
2014
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002015void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
2016 enum MonitorEvent ev,
2017 BlockErrorAction action, bool is_read)
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002018{
2019 QObject *data;
2020 const char *action_str;
2021
2022 switch (action) {
2023 case BDRV_ACTION_REPORT:
2024 action_str = "report";
2025 break;
2026 case BDRV_ACTION_IGNORE:
2027 action_str = "ignore";
2028 break;
2029 case BDRV_ACTION_STOP:
2030 action_str = "stop";
2031 break;
2032 default:
2033 abort();
2034 }
2035
2036 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2037 bdrv->device_name,
2038 action_str,
2039 is_read ? "read" : "write");
Paolo Bonzini32c81a42012-09-28 17:22:58 +02002040 monitor_protocol_event(ev, data);
Luiz Capitulino329c0a42012-01-25 16:59:43 -02002041
2042 qobject_decref(data);
2043}
2044
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002045static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
2046{
2047 QObject *data;
2048
2049 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2050 bdrv_get_device_name(bs), ejected);
2051 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
2052
2053 qobject_decref(data);
2054}
2055
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002056static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
Markus Armbruster0e49de52011-08-03 15:07:41 +02002057{
Markus Armbruster145feb12011-08-03 15:07:42 +02002058 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002059 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02002060 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02002061 if (tray_was_closed) {
2062 /* tray open */
2063 bdrv_emit_qmp_eject_event(bs, true);
2064 }
2065 if (load) {
2066 /* tray close */
2067 bdrv_emit_qmp_eject_event(bs, false);
2068 }
Markus Armbruster145feb12011-08-03 15:07:42 +02002069 }
2070}
2071
Markus Armbruster2c6942f2011-09-06 18:58:51 +02002072bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2073{
2074 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2075}
2076
Paolo Bonzini025ccaa2011-11-07 17:50:13 +01002077void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2078{
2079 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2080 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2081 }
2082}
2083
Markus Armbrustere4def802011-09-06 18:58:53 +02002084bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2085{
2086 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2087 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2088 }
2089 return false;
2090}
2091
Markus Armbruster145feb12011-08-03 15:07:42 +02002092static void bdrv_dev_resize_cb(BlockDriverState *bs)
2093{
2094 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2095 bs->dev_ops->resize_cb(bs->dev_opaque);
Markus Armbruster0e49de52011-08-03 15:07:41 +02002096 }
2097}
2098
Markus Armbrusterf1076392011-09-06 18:58:46 +02002099bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2100{
2101 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2102 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2103 }
2104 return false;
2105}
2106
aliguorie97fc192009-04-21 23:11:50 +00002107/*
2108 * Run consistency checks on an image
2109 *
Kevin Wolfe076f332010-06-29 11:43:13 +02002110 * Returns 0 if the check could be completed (it doesn't mean that the image is
Stefan Weila1c72732011-04-28 17:20:38 +02002111 * free of errors) or -errno when an internal error occurred. The results of the
Kevin Wolfe076f332010-06-29 11:43:13 +02002112 * check are stored in res.
aliguorie97fc192009-04-21 23:11:50 +00002113 */
Kevin Wolf4534ff52012-05-11 16:07:02 +02002114int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
aliguorie97fc192009-04-21 23:11:50 +00002115{
2116 if (bs->drv->bdrv_check == NULL) {
2117 return -ENOTSUP;
2118 }
2119
Kevin Wolfe076f332010-06-29 11:43:13 +02002120 memset(res, 0, sizeof(*res));
Kevin Wolf4534ff52012-05-11 16:07:02 +02002121 return bs->drv->bdrv_check(bs, res, fix);
aliguorie97fc192009-04-21 23:11:50 +00002122}
2123
Kevin Wolf8a426612010-07-16 17:17:01 +02002124#define COMMIT_BUF_SECTORS 2048
2125
bellard33e39632003-07-06 17:15:21 +00002126/* commit COW file into the raw image */
2127int bdrv_commit(BlockDriverState *bs)
2128{
bellard19cb3732006-08-19 11:45:59 +00002129 BlockDriver *drv = bs->drv;
Jeff Cody72706ea2014-01-24 09:02:35 -05002130 int64_t sector, total_sectors, length, backing_length;
Kevin Wolf8a426612010-07-16 17:17:01 +02002131 int n, ro, open_flags;
Jeff Cody0bce5972012-09-20 15:13:34 -04002132 int ret = 0;
Jeff Cody72706ea2014-01-24 09:02:35 -05002133 uint8_t *buf = NULL;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002134 char filename[PATH_MAX];
bellard33e39632003-07-06 17:15:21 +00002135
bellard19cb3732006-08-19 11:45:59 +00002136 if (!drv)
2137 return -ENOMEDIUM;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002138
2139 if (!bs->backing_hd) {
2140 return -ENOTSUP;
bellard33e39632003-07-06 17:15:21 +00002141 }
2142
Stefan Hajnoczi2d3735d2012-01-18 14:40:41 +00002143 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
2144 return -EBUSY;
2145 }
2146
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002147 ro = bs->backing_hd->read_only;
Jim Meyeringc2cba3d2012-10-04 13:09:46 +02002148 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2149 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002150 open_flags = bs->backing_hd->open_flags;
2151
2152 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002153 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2154 return -EACCES;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002155 }
bellard33e39632003-07-06 17:15:21 +00002156 }
bellardea2384d2004-08-01 21:59:26 +00002157
Jeff Cody72706ea2014-01-24 09:02:35 -05002158 length = bdrv_getlength(bs);
2159 if (length < 0) {
2160 ret = length;
2161 goto ro_cleanup;
2162 }
2163
2164 backing_length = bdrv_getlength(bs->backing_hd);
2165 if (backing_length < 0) {
2166 ret = backing_length;
2167 goto ro_cleanup;
2168 }
2169
2170 /* If our top snapshot is larger than the backing file image,
2171 * grow the backing file image if possible. If not possible,
2172 * we must return an error */
2173 if (length > backing_length) {
2174 ret = bdrv_truncate(bs->backing_hd, length);
2175 if (ret < 0) {
2176 goto ro_cleanup;
2177 }
2178 }
2179
2180 total_sectors = length >> BDRV_SECTOR_BITS;
Anthony Liguori7267c092011-08-20 22:09:37 -05002181 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
bellardea2384d2004-08-01 21:59:26 +00002182
Kevin Wolf8a426612010-07-16 17:17:01 +02002183 for (sector = 0; sector < total_sectors; sector += n) {
Paolo Bonzinid6636402013-09-04 19:00:25 +02002184 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2185 if (ret < 0) {
2186 goto ro_cleanup;
2187 }
2188 if (ret) {
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002189 ret = bdrv_read(bs, sector, buf, n);
2190 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002191 goto ro_cleanup;
2192 }
2193
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002194 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2195 if (ret < 0) {
Kevin Wolf8a426612010-07-16 17:17:01 +02002196 goto ro_cleanup;
2197 }
bellardea2384d2004-08-01 21:59:26 +00002198 }
2199 }
bellard95389c82005-12-18 18:28:15 +00002200
Christoph Hellwig1d449522010-01-17 12:32:30 +01002201 if (drv->bdrv_make_empty) {
2202 ret = drv->bdrv_make_empty(bs);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002203 if (ret < 0) {
2204 goto ro_cleanup;
2205 }
Christoph Hellwig1d449522010-01-17 12:32:30 +01002206 bdrv_flush(bs);
2207 }
bellard95389c82005-12-18 18:28:15 +00002208
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002209 /*
2210 * Make sure all data we wrote to the backing device is actually
2211 * stable on disk.
2212 */
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002213 if (bs->backing_hd) {
Christoph Hellwig3f5075a2010-01-12 13:49:23 +01002214 bdrv_flush(bs->backing_hd);
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002215 }
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002216
Kevin Wolfdabfa6c2014-01-24 14:00:43 +01002217 ret = 0;
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002218ro_cleanup:
Anthony Liguori7267c092011-08-20 22:09:37 -05002219 g_free(buf);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002220
2221 if (ro) {
Jeff Cody0bce5972012-09-20 15:13:34 -04002222 /* ignoring error return here */
2223 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
Naphtali Sprei4dca4b62010-02-14 13:39:18 +02002224 }
2225
Christoph Hellwig1d449522010-01-17 12:32:30 +01002226 return ret;
bellard33e39632003-07-06 17:15:21 +00002227}
2228
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002229int bdrv_commit_all(void)
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002230{
2231 BlockDriverState *bs;
2232
Benoît Canetdc364f42014-01-23 21:31:32 +01002233 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Jeff Cody272d2d82013-02-26 09:55:48 -05002234 if (bs->drv && bs->backing_hd) {
2235 int ret = bdrv_commit(bs);
2236 if (ret < 0) {
2237 return ret;
2238 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002239 }
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002240 }
Stefan Hajnoczie8877492012-03-05 18:10:11 +00002241 return 0;
Markus Armbruster6ab4b5a2010-06-02 18:55:18 +02002242}
2243
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002244/**
2245 * Remove an active request from the tracked requests list
2246 *
2247 * This function should be called when a tracked request is completing.
2248 */
2249static void tracked_request_end(BdrvTrackedRequest *req)
2250{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002251 if (req->serialising) {
2252 req->bs->serialising_in_flight--;
2253 }
2254
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002255 QLIST_REMOVE(req, list);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002256 qemu_co_queue_restart_all(&req->wait_queue);
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002257}
2258
2259/**
2260 * Add an active request to the tracked requests list
2261 */
2262static void tracked_request_begin(BdrvTrackedRequest *req,
2263 BlockDriverState *bs,
Kevin Wolf793ed472013-12-03 15:31:25 +01002264 int64_t offset,
2265 unsigned int bytes, bool is_write)
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002266{
2267 *req = (BdrvTrackedRequest){
2268 .bs = bs,
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002269 .offset = offset,
2270 .bytes = bytes,
2271 .is_write = is_write,
2272 .co = qemu_coroutine_self(),
2273 .serialising = false,
Kevin Wolf73271452013-12-04 17:08:50 +01002274 .overlap_offset = offset,
2275 .overlap_bytes = bytes,
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002276 };
2277
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002278 qemu_co_queue_init(&req->wait_queue);
2279
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002280 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2281}
2282
Kevin Wolfe96126f2014-02-08 10:42:18 +01002283static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002284{
Kevin Wolf73271452013-12-04 17:08:50 +01002285 int64_t overlap_offset = req->offset & ~(align - 1);
Kevin Wolfe96126f2014-02-08 10:42:18 +01002286 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2287 - overlap_offset;
Kevin Wolf73271452013-12-04 17:08:50 +01002288
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002289 if (!req->serialising) {
2290 req->bs->serialising_in_flight++;
2291 req->serialising = true;
2292 }
Kevin Wolf73271452013-12-04 17:08:50 +01002293
2294 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2295 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002296}
2297
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002298/**
2299 * Round a region to cluster boundaries
2300 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002301void bdrv_round_to_clusters(BlockDriverState *bs,
2302 int64_t sector_num, int nb_sectors,
2303 int64_t *cluster_sector_num,
2304 int *cluster_nb_sectors)
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002305{
2306 BlockDriverInfo bdi;
2307
2308 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2309 *cluster_sector_num = sector_num;
2310 *cluster_nb_sectors = nb_sectors;
2311 } else {
2312 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2313 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2314 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2315 nb_sectors, c);
2316 }
2317}
2318
Kevin Wolf73271452013-12-04 17:08:50 +01002319static int bdrv_get_cluster_size(BlockDriverState *bs)
Kevin Wolf793ed472013-12-03 15:31:25 +01002320{
2321 BlockDriverInfo bdi;
Kevin Wolf73271452013-12-04 17:08:50 +01002322 int ret;
Kevin Wolf793ed472013-12-03 15:31:25 +01002323
Kevin Wolf73271452013-12-04 17:08:50 +01002324 ret = bdrv_get_info(bs, &bdi);
2325 if (ret < 0 || bdi.cluster_size == 0) {
2326 return bs->request_alignment;
Kevin Wolf793ed472013-12-03 15:31:25 +01002327 } else {
Kevin Wolf73271452013-12-04 17:08:50 +01002328 return bdi.cluster_size;
Kevin Wolf793ed472013-12-03 15:31:25 +01002329 }
2330}
2331
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002332static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Kevin Wolf793ed472013-12-03 15:31:25 +01002333 int64_t offset, unsigned int bytes)
2334{
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002335 /* aaaa bbbb */
Kevin Wolf73271452013-12-04 17:08:50 +01002336 if (offset >= req->overlap_offset + req->overlap_bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002337 return false;
2338 }
2339 /* bbbb aaaa */
Kevin Wolf73271452013-12-04 17:08:50 +01002340 if (req->overlap_offset >= offset + bytes) {
Stefan Hajnoczid83947a2011-11-23 11:47:56 +00002341 return false;
2342 }
2343 return true;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002344}
2345
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002346static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002347{
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002348 BlockDriverState *bs = self->bs;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002349 BdrvTrackedRequest *req;
2350 bool retry;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002351 bool waited = false;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002352
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002353 if (!bs->serialising_in_flight) {
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002354 return false;
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002355 }
2356
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002357 do {
2358 retry = false;
2359 QLIST_FOREACH(req, &bs->tracked_requests, list) {
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002360 if (req == self || (!req->serialising && !self->serialising)) {
Kevin Wolf65afd212013-12-03 14:55:55 +01002361 continue;
2362 }
Kevin Wolf73271452013-12-04 17:08:50 +01002363 if (tracked_request_overlaps(req, self->overlap_offset,
2364 self->overlap_bytes))
2365 {
Stefan Hajnoczi5f8b6492011-11-30 12:23:42 +00002366 /* Hitting this means there was a reentrant request, for
2367 * example, a block driver issuing nested requests. This must
2368 * never happen since it means deadlock.
2369 */
2370 assert(qemu_coroutine_self() != req->co);
2371
Kevin Wolf64604402013-12-13 13:04:35 +01002372 /* If the request is already (indirectly) waiting for us, or
2373 * will wait for us as soon as it wakes up, then just go on
2374 * (instead of producing a deadlock in the former case). */
2375 if (!req->waiting_for) {
2376 self->waiting_for = req;
2377 qemu_co_queue_wait(&req->wait_queue);
2378 self->waiting_for = NULL;
2379 retry = true;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002380 waited = true;
Kevin Wolf64604402013-12-13 13:04:35 +01002381 break;
2382 }
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002383 }
2384 }
2385 } while (retry);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01002386
2387 return waited;
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002388}
2389
Kevin Wolf756e6732010-01-12 12:55:17 +01002390/*
2391 * Return values:
2392 * 0 - success
2393 * -EINVAL - backing format specified, but no file
2394 * -ENOSPC - can't update the backing file because no space is left in the
2395 * image file header
2396 * -ENOTSUP - format driver doesn't support changing the backing file
2397 */
2398int bdrv_change_backing_file(BlockDriverState *bs,
2399 const char *backing_file, const char *backing_fmt)
2400{
2401 BlockDriver *drv = bs->drv;
Paolo Bonzini469ef352012-04-12 14:01:02 +02002402 int ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002403
Paolo Bonzini5f377792012-04-12 14:01:01 +02002404 /* Backing file format doesn't make sense without a backing file */
2405 if (backing_fmt && !backing_file) {
2406 return -EINVAL;
2407 }
2408
Kevin Wolf756e6732010-01-12 12:55:17 +01002409 if (drv->bdrv_change_backing_file != NULL) {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002410 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
Kevin Wolf756e6732010-01-12 12:55:17 +01002411 } else {
Paolo Bonzini469ef352012-04-12 14:01:02 +02002412 ret = -ENOTSUP;
Kevin Wolf756e6732010-01-12 12:55:17 +01002413 }
Paolo Bonzini469ef352012-04-12 14:01:02 +02002414
2415 if (ret == 0) {
2416 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2417 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2418 }
2419 return ret;
Kevin Wolf756e6732010-01-12 12:55:17 +01002420}
2421
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002422/*
2423 * Finds the image layer in the chain that has 'bs' as its backing file.
2424 *
2425 * active is the current topmost image.
2426 *
2427 * Returns NULL if bs is not found in active's image chain,
2428 * or if active == bs.
2429 */
2430BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2431 BlockDriverState *bs)
2432{
2433 BlockDriverState *overlay = NULL;
2434 BlockDriverState *intermediate;
2435
2436 assert(active != NULL);
2437 assert(bs != NULL);
2438
2439 /* if bs is the same as active, then by definition it has no overlay
2440 */
2441 if (active == bs) {
2442 return NULL;
2443 }
2444
2445 intermediate = active;
2446 while (intermediate->backing_hd) {
2447 if (intermediate->backing_hd == bs) {
2448 overlay = intermediate;
2449 break;
2450 }
2451 intermediate = intermediate->backing_hd;
2452 }
2453
2454 return overlay;
2455}
2456
2457typedef struct BlkIntermediateStates {
2458 BlockDriverState *bs;
2459 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2460} BlkIntermediateStates;
2461
2462
2463/*
2464 * Drops images above 'base' up to and including 'top', and sets the image
2465 * above 'top' to have base as its backing file.
2466 *
2467 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2468 * information in 'bs' can be properly updated.
2469 *
2470 * E.g., this will convert the following chain:
2471 * bottom <- base <- intermediate <- top <- active
2472 *
2473 * to
2474 *
2475 * bottom <- base <- active
2476 *
2477 * It is allowed for bottom==base, in which case it converts:
2478 *
2479 * base <- intermediate <- top <- active
2480 *
2481 * to
2482 *
2483 * base <- active
2484 *
2485 * Error conditions:
2486 * if active == top, that is considered an error
2487 *
2488 */
2489int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2490 BlockDriverState *base)
2491{
2492 BlockDriverState *intermediate;
2493 BlockDriverState *base_bs = NULL;
2494 BlockDriverState *new_top_bs = NULL;
2495 BlkIntermediateStates *intermediate_state, *next;
2496 int ret = -EIO;
2497
2498 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2499 QSIMPLEQ_INIT(&states_to_delete);
2500
2501 if (!top->drv || !base->drv) {
2502 goto exit;
2503 }
2504
2505 new_top_bs = bdrv_find_overlay(active, top);
2506
2507 if (new_top_bs == NULL) {
2508 /* we could not find the image above 'top', this is an error */
2509 goto exit;
2510 }
2511
2512 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2513 * to do, no intermediate images */
2514 if (new_top_bs->backing_hd == base) {
2515 ret = 0;
2516 goto exit;
2517 }
2518
2519 intermediate = top;
2520
2521 /* now we will go down through the list, and add each BDS we find
2522 * into our deletion queue, until we hit the 'base'
2523 */
2524 while (intermediate) {
2525 intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
2526 intermediate_state->bs = intermediate;
2527 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2528
2529 if (intermediate->backing_hd == base) {
2530 base_bs = intermediate->backing_hd;
2531 break;
2532 }
2533 intermediate = intermediate->backing_hd;
2534 }
2535 if (base_bs == NULL) {
2536 /* something went wrong, we did not end at the base. safely
2537 * unravel everything, and exit with error */
2538 goto exit;
2539 }
2540
2541 /* success - we can delete the intermediate states, and link top->base */
2542 ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
2543 base_bs->drv ? base_bs->drv->format_name : "");
2544 if (ret) {
2545 goto exit;
2546 }
2547 new_top_bs->backing_hd = base_bs;
2548
Kevin Wolf355ef4a2013-12-11 20:14:09 +01002549 bdrv_refresh_limits(new_top_bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002550
2551 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2552 /* so that bdrv_close() does not recursively close the chain */
2553 intermediate_state->bs->backing_hd = NULL;
Fam Zheng4f6fd342013-08-23 09:14:47 +08002554 bdrv_unref(intermediate_state->bs);
Jeff Cody6ebdcee2012-09-27 13:29:12 -04002555 }
2556 ret = 0;
2557
2558exit:
2559 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2560 g_free(intermediate_state);
2561 }
2562 return ret;
2563}
2564
2565
aliguori71d07702009-03-03 17:37:16 +00002566static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2567 size_t size)
2568{
2569 int64_t len;
2570
2571 if (!bdrv_is_inserted(bs))
2572 return -ENOMEDIUM;
2573
2574 if (bs->growable)
2575 return 0;
2576
2577 len = bdrv_getlength(bs);
2578
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02002579 if (offset < 0)
2580 return -EIO;
2581
2582 if ((offset > len) || (len - offset < size))
aliguori71d07702009-03-03 17:37:16 +00002583 return -EIO;
2584
2585 return 0;
2586}
2587
2588static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2589 int nb_sectors)
2590{
Kevin Wolf8f4754e2014-03-26 13:06:02 +01002591 if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2592 return -EIO;
2593 }
2594
Jes Sorenseneb5a3162010-05-27 16:20:31 +02002595 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2596 nb_sectors * BDRV_SECTOR_SIZE);
aliguori71d07702009-03-03 17:37:16 +00002597}
2598
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002599typedef struct RwCo {
2600 BlockDriverState *bs;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002601 int64_t offset;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002602 QEMUIOVector *qiov;
2603 bool is_write;
2604 int ret;
Peter Lieven4105eaa2013-07-11 14:16:22 +02002605 BdrvRequestFlags flags;
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002606} RwCo;
2607
2608static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2609{
2610 RwCo *rwco = opaque;
2611
2612 if (!rwco->is_write) {
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002613 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2614 rwco->qiov->size, rwco->qiov,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002615 rwco->flags);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002616 } else {
2617 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2618 rwco->qiov->size, rwco->qiov,
2619 rwco->flags);
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002620 }
2621}
2622
2623/*
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002624 * Process a vectored synchronous request using coroutines
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002625 */
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002626static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2627 QEMUIOVector *qiov, bool is_write,
2628 BdrvRequestFlags flags)
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002629{
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002630 Coroutine *co;
2631 RwCo rwco = {
2632 .bs = bs,
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002633 .offset = offset,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002634 .qiov = qiov,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002635 .is_write = is_write,
2636 .ret = NOT_DONE,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002637 .flags = flags,
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002638 };
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002639
Zhi Yong Wu498e3862012-04-02 18:59:34 +08002640 /**
2641 * In sync call context, when the vcpu is blocked, this throttling timer
2642 * will not fire; so the I/O throttling function has to be disabled here
2643 * if it has been enabled.
2644 */
2645 if (bs->io_limits_enabled) {
2646 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2647 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2648 bdrv_io_limits_disable(bs);
2649 }
2650
Stefan Hajnoczi1c9805a2011-10-13 13:08:22 +01002651 if (qemu_in_coroutine()) {
2652 /* Fast-path if already in coroutine context */
2653 bdrv_rw_co_entry(&rwco);
2654 } else {
2655 co = qemu_coroutine_create(bdrv_rw_co_entry);
2656 qemu_coroutine_enter(co, &rwco);
2657 while (rwco.ret == NOT_DONE) {
2658 qemu_aio_wait();
2659 }
2660 }
2661 return rwco.ret;
2662}
2663
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002664/*
2665 * Process a synchronous request using coroutines
2666 */
2667static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
Peter Lieven4105eaa2013-07-11 14:16:22 +02002668 int nb_sectors, bool is_write, BdrvRequestFlags flags)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002669{
2670 QEMUIOVector qiov;
2671 struct iovec iov = {
2672 .iov_base = (void *)buf,
2673 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2674 };
2675
2676 qemu_iovec_init_external(&qiov, &iov, 1);
Kevin Wolf775aa8b2013-12-05 12:09:38 +01002677 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2678 &qiov, is_write, flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002679}
2680
bellard19cb3732006-08-19 11:45:59 +00002681/* return < 0 if error. See bdrv_write() for the return codes */
ths5fafdf22007-09-16 21:08:06 +00002682int bdrv_read(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002683 uint8_t *buf, int nb_sectors)
2684{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002685 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
bellardfc01f7e2003-06-30 10:03:06 +00002686}
2687
Markus Armbruster07d27a42012-06-29 17:34:29 +02002688/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2689int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2690 uint8_t *buf, int nb_sectors)
2691{
2692 bool enabled;
2693 int ret;
2694
2695 enabled = bs->io_limits_enabled;
2696 bs->io_limits_enabled = false;
Peter Lieven4e7395e2013-07-18 10:37:32 +02002697 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
Markus Armbruster07d27a42012-06-29 17:34:29 +02002698 bs->io_limits_enabled = enabled;
2699 return ret;
2700}
2701
ths5fafdf22007-09-16 21:08:06 +00002702/* Return < 0 if error. Important errors are:
bellard19cb3732006-08-19 11:45:59 +00002703 -EIO generic I/O error (may happen for all errors)
2704 -ENOMEDIUM No media inserted.
2705 -EINVAL Invalid sector number or nb_sectors
2706 -EACCES Trying to write a read-only device
2707*/
ths5fafdf22007-09-16 21:08:06 +00002708int bdrv_write(BlockDriverState *bs, int64_t sector_num,
bellardfc01f7e2003-06-30 10:03:06 +00002709 const uint8_t *buf, int nb_sectors)
2710{
Peter Lieven4105eaa2013-07-11 14:16:22 +02002711 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
bellard83f64092006-08-01 16:21:11 +00002712}
2713
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002714int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2715 int nb_sectors, BdrvRequestFlags flags)
Peter Lieven4105eaa2013-07-11 14:16:22 +02002716{
2717 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002718 BDRV_REQ_ZERO_WRITE | flags);
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002719}
2720
Peter Lievend75cbb52013-10-24 12:07:03 +02002721/*
2722 * Completely zero out a block device with the help of bdrv_write_zeroes.
2723 * The operation is sped up by checking the block status and only writing
2724 * zeroes to the device if they currently do not return zeroes. Optional
2725 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2726 *
2727 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2728 */
2729int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2730{
2731 int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
2732 int64_t ret, nb_sectors, sector_num = 0;
2733 int n;
2734
2735 for (;;) {
2736 nb_sectors = target_size - sector_num;
2737 if (nb_sectors <= 0) {
2738 return 0;
2739 }
2740 if (nb_sectors > INT_MAX) {
2741 nb_sectors = INT_MAX;
2742 }
2743 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
Peter Lieven3d94ce62013-12-12 13:57:05 +01002744 if (ret < 0) {
2745 error_report("error getting block status at sector %" PRId64 ": %s",
2746 sector_num, strerror(-ret));
2747 return ret;
2748 }
Peter Lievend75cbb52013-10-24 12:07:03 +02002749 if (ret & BDRV_BLOCK_ZERO) {
2750 sector_num += n;
2751 continue;
2752 }
2753 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2754 if (ret < 0) {
2755 error_report("error writing zeroes at sector %" PRId64 ": %s",
2756 sector_num, strerror(-ret));
2757 return ret;
2758 }
2759 sector_num += n;
2760 }
2761}
2762
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002763int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
bellard83f64092006-08-01 16:21:11 +00002764{
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002765 QEMUIOVector qiov;
2766 struct iovec iov = {
2767 .iov_base = (void *)buf,
2768 .iov_len = bytes,
2769 };
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002770 int ret;
bellard83f64092006-08-01 16:21:11 +00002771
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002772 if (bytes < 0) {
2773 return -EINVAL;
bellard83f64092006-08-01 16:21:11 +00002774 }
2775
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002776 qemu_iovec_init_external(&qiov, &iov, 1);
2777 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2778 if (ret < 0) {
2779 return ret;
bellard83f64092006-08-01 16:21:11 +00002780 }
2781
Kevin Wolfa3ef6572013-12-05 12:29:59 +01002782 return bytes;
bellard83f64092006-08-01 16:21:11 +00002783}
2784
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002785int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
bellard83f64092006-08-01 16:21:11 +00002786{
Kevin Wolf9a8c4cc2010-01-20 15:03:02 +01002787 int ret;
bellard83f64092006-08-01 16:21:11 +00002788
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002789 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2790 if (ret < 0) {
2791 return ret;
bellard83f64092006-08-01 16:21:11 +00002792 }
2793
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002794 return qiov->size;
2795}
2796
2797int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002798 const void *buf, int bytes)
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002799{
2800 QEMUIOVector qiov;
2801 struct iovec iov = {
2802 .iov_base = (void *) buf,
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002803 .iov_len = bytes,
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002804 };
2805
Kevin Wolf8407d5d2013-12-05 12:34:02 +01002806 if (bytes < 0) {
2807 return -EINVAL;
2808 }
2809
Kevin Wolf8d3b1a22013-04-05 21:27:55 +02002810 qemu_iovec_init_external(&qiov, &iov, 1);
2811 return bdrv_pwritev(bs, offset, &qiov);
bellard83f64092006-08-01 16:21:11 +00002812}
bellard83f64092006-08-01 16:21:11 +00002813
Kevin Wolff08145f2010-06-16 16:38:15 +02002814/*
2815 * Writes to the file and ensures that no writes are reordered across this
2816 * request (acts as a barrier)
2817 *
2818 * Returns 0 on success, -errno in error cases.
2819 */
2820int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2821 const void *buf, int count)
2822{
2823 int ret;
2824
2825 ret = bdrv_pwrite(bs, offset, buf, count);
2826 if (ret < 0) {
2827 return ret;
2828 }
2829
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002830 /* No flush needed for cache modes that already do it */
2831 if (bs->enable_write_cache) {
Kevin Wolff08145f2010-06-16 16:38:15 +02002832 bdrv_flush(bs);
2833 }
2834
2835 return 0;
2836}
2837
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002838static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002839 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2840{
2841 /* Perform I/O through a temporary buffer so that users who scribble over
2842 * their read buffer while the operation is in progress do not end up
2843 * modifying the image file. This is critical for zero-copy guest I/O
2844 * where anything might happen inside guest memory.
2845 */
2846 void *bounce_buffer;
2847
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002848 BlockDriver *drv = bs->drv;
Stefan Hajnocziab185922011-11-17 13:40:31 +00002849 struct iovec iov;
2850 QEMUIOVector bounce_qiov;
2851 int64_t cluster_sector_num;
2852 int cluster_nb_sectors;
2853 size_t skip_bytes;
2854 int ret;
2855
2856 /* Cover entire cluster so no additional backing file I/O is required when
2857 * allocating cluster in the image file.
2858 */
Paolo Bonzini343bded2013-01-21 17:09:42 +01002859 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
2860 &cluster_sector_num, &cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002861
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002862 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
2863 cluster_sector_num, cluster_nb_sectors);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002864
2865 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
2866 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
2867 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
2868
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002869 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
2870 &bounce_qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002871 if (ret < 0) {
2872 goto err;
2873 }
2874
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002875 if (drv->bdrv_co_write_zeroes &&
2876 buffer_is_zero(bounce_buffer, iov.iov_len)) {
Kevin Wolf621f0582012-03-20 15:12:58 +01002877 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02002878 cluster_nb_sectors, 0);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002879 } else {
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02002880 /* This does not change the data on the disk, it is not necessary
2881 * to flush even in cache=writethrough mode.
2882 */
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002883 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
Stefan Hajnocziab185922011-11-17 13:40:31 +00002884 &bounce_qiov);
Stefan Hajnoczi79c053b2012-02-07 13:27:26 +00002885 }
2886
Stefan Hajnocziab185922011-11-17 13:40:31 +00002887 if (ret < 0) {
2888 /* It might be okay to ignore write errors for guest requests. If this
2889 * is a deliberate copy-on-read then we don't want to ignore the error.
2890 * Simply report it in all cases.
2891 */
2892 goto err;
2893 }
2894
2895 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
Michael Tokarev03396142012-06-07 20:17:55 +04002896 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
2897 nb_sectors * BDRV_SECTOR_SIZE);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002898
2899err:
2900 qemu_vfree(bounce_buffer);
2901 return ret;
2902}
2903
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002904/*
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002905 * Forwards an already correctly aligned request to the BlockDriver. This
2906 * handles copy on read and zeroing after EOF; any other features must be
2907 * implemented by the caller.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01002908 */
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002909static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01002910 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
Kevin Wolfec746e12013-12-04 12:13:10 +01002911 int64_t align, QEMUIOVector *qiov, int flags)
Kevin Wolfda1fa912011-07-14 17:27:13 +02002912{
2913 BlockDriver *drv = bs->drv;
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002914 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002915
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002916 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
2917 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002918
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002919 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
2920 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
2921
2922 /* Handle Copy on Read and associated serialisation */
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002923 if (flags & BDRV_REQ_COPY_ON_READ) {
Kevin Wolf73271452013-12-04 17:08:50 +01002924 /* If we touch the same cluster it counts as an overlap. This
2925 * guarantees that allocating writes will be serialized and not race
2926 * with each other for the same cluster. For example, in copy-on-read
2927 * it ensures that the CoR read and write operations are atomic and
2928 * guest writes cannot interleave between them. */
2929 mark_request_serialising(req, bdrv_get_cluster_size(bs));
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002930 }
2931
Kevin Wolf2dbafdc2013-12-04 16:43:44 +01002932 wait_serialising_requests(req);
Stefan Hajnoczif4658282011-11-17 13:40:29 +00002933
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002934 if (flags & BDRV_REQ_COPY_ON_READ) {
Stefan Hajnocziab185922011-11-17 13:40:31 +00002935 int pnum;
2936
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02002937 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002938 if (ret < 0) {
2939 goto out;
2940 }
2941
2942 if (!ret || pnum != nb_sectors) {
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00002943 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
Stefan Hajnocziab185922011-11-17 13:40:31 +00002944 goto out;
2945 }
2946 }
2947
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002948 /* Forward the request to the BlockDriver */
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002949 if (!(bs->zero_beyond_eof && bs->growable)) {
2950 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
2951 } else {
2952 /* Read zeros after EOF of growable BDSes */
2953 int64_t len, total_sectors, max_nb_sectors;
2954
2955 len = bdrv_getlength(bs);
2956 if (len < 0) {
2957 ret = len;
2958 goto out;
2959 }
2960
Fam Zhengd055a1f2013-09-26 19:55:33 +08002961 total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
Kevin Wolf5f5bcd82014-02-07 16:00:09 +01002962 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
2963 align >> BDRV_SECTOR_BITS);
MORITA Kazutaka893a8f62013-08-06 09:53:40 +08002964 if (max_nb_sectors > 0) {
2965 ret = drv->bdrv_co_readv(bs, sector_num,
2966 MIN(nb_sectors, max_nb_sectors), qiov);
2967 } else {
2968 ret = 0;
2969 }
2970
2971 /* Reading beyond end of file is supposed to produce zeroes */
2972 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
2973 uint64_t offset = MAX(0, total_sectors - sector_num);
2974 uint64_t bytes = (sector_num + nb_sectors - offset) *
2975 BDRV_SECTOR_SIZE;
2976 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
2977 }
2978 }
Stefan Hajnocziab185922011-11-17 13:40:31 +00002979
2980out:
Stefan Hajnoczidbffbdc2011-11-17 13:40:27 +00002981 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02002982}
2983
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002984/*
2985 * Handle a read request in coroutine context
2986 */
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002987static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
2988 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfd0c7f642013-12-02 15:07:48 +01002989 BdrvRequestFlags flags)
2990{
2991 BlockDriver *drv = bs->drv;
Kevin Wolf65afd212013-12-03 14:55:55 +01002992 BdrvTrackedRequest req;
2993
Kevin Wolf1b0288a2013-12-02 16:09:46 +01002994 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
2995 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
2996 uint8_t *head_buf = NULL;
2997 uint8_t *tail_buf = NULL;
2998 QEMUIOVector local_qiov;
2999 bool use_local_qiov = false;
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003000 int ret;
3001
3002 if (!drv) {
3003 return -ENOMEDIUM;
3004 }
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003005 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003006 return -EIO;
3007 }
3008
3009 if (bs->copy_on_read) {
3010 flags |= BDRV_REQ_COPY_ON_READ;
3011 }
3012
3013 /* throttling disk I/O */
3014 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003015 bdrv_io_limits_intercept(bs, bytes, false);
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003016 }
3017
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003018 /* Align read if necessary by padding qiov */
3019 if (offset & (align - 1)) {
3020 head_buf = qemu_blockalign(bs, align);
3021 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3022 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3023 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3024 use_local_qiov = true;
3025
3026 bytes += offset & (align - 1);
3027 offset = offset & ~(align - 1);
3028 }
3029
3030 if ((offset + bytes) & (align - 1)) {
3031 if (!use_local_qiov) {
3032 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3033 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3034 use_local_qiov = true;
3035 }
3036 tail_buf = qemu_blockalign(bs, align);
3037 qemu_iovec_add(&local_qiov, tail_buf,
3038 align - ((offset + bytes) & (align - 1)));
3039
3040 bytes = ROUND_UP(bytes, align);
3041 }
3042
Kevin Wolf65afd212013-12-03 14:55:55 +01003043 tracked_request_begin(&req, bs, offset, bytes, false);
Kevin Wolfec746e12013-12-04 12:13:10 +01003044 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003045 use_local_qiov ? &local_qiov : qiov,
3046 flags);
Kevin Wolf65afd212013-12-03 14:55:55 +01003047 tracked_request_end(&req);
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003048
3049 if (use_local_qiov) {
3050 qemu_iovec_destroy(&local_qiov);
3051 qemu_vfree(head_buf);
3052 qemu_vfree(tail_buf);
3053 }
3054
Kevin Wolfd0c7f642013-12-02 15:07:48 +01003055 return ret;
3056}
3057
Kevin Wolf1b0288a2013-12-02 16:09:46 +01003058static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3059 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3060 BdrvRequestFlags flags)
3061{
3062 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3063 return -EINVAL;
3064 }
3065
3066 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3067 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3068}
3069
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003070int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
Kevin Wolfda1fa912011-07-14 17:27:13 +02003071 int nb_sectors, QEMUIOVector *qiov)
3072{
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003073 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003074
Stefan Hajnoczi470c0502012-01-18 14:40:42 +00003075 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3076}
3077
3078int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3079 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3080{
3081 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3082
3083 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3084 BDRV_REQ_COPY_ON_READ);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003085}
3086
Peter Lievenc31cb702013-10-24 12:06:58 +02003087/* if no limit is specified in the BlockLimits use a default
3088 * of 32768 512-byte sectors (16 MiB) per request.
3089 */
3090#define MAX_WRITE_ZEROES_DEFAULT 32768
3091
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003092static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003093 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003094{
3095 BlockDriver *drv = bs->drv;
3096 QEMUIOVector qiov;
Peter Lievenc31cb702013-10-24 12:06:58 +02003097 struct iovec iov = {0};
3098 int ret = 0;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003099
Peter Lievenc31cb702013-10-24 12:06:58 +02003100 int max_write_zeroes = bs->bl.max_write_zeroes ?
3101 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
Kevin Wolf621f0582012-03-20 15:12:58 +01003102
Peter Lievenc31cb702013-10-24 12:06:58 +02003103 while (nb_sectors > 0 && !ret) {
3104 int num = nb_sectors;
3105
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003106 /* Align request. Block drivers can expect the "bulk" of the request
3107 * to be aligned.
3108 */
3109 if (bs->bl.write_zeroes_alignment
3110 && num > bs->bl.write_zeroes_alignment) {
3111 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3112 /* Make a small request up to the first aligned sector. */
Peter Lievenc31cb702013-10-24 12:06:58 +02003113 num = bs->bl.write_zeroes_alignment;
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003114 num -= sector_num % bs->bl.write_zeroes_alignment;
3115 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3116 /* Shorten the request to the last aligned sector. num cannot
3117 * underflow because num > bs->bl.write_zeroes_alignment.
3118 */
3119 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
Peter Lievenc31cb702013-10-24 12:06:58 +02003120 }
Kevin Wolf621f0582012-03-20 15:12:58 +01003121 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003122
3123 /* limit request size */
3124 if (num > max_write_zeroes) {
3125 num = max_write_zeroes;
3126 }
3127
3128 ret = -ENOTSUP;
3129 /* First try the efficient write zeroes operation */
3130 if (drv->bdrv_co_write_zeroes) {
3131 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3132 }
3133
3134 if (ret == -ENOTSUP) {
3135 /* Fall back to bounce buffer if write zeroes is unsupported */
3136 iov.iov_len = num * BDRV_SECTOR_SIZE;
3137 if (iov.iov_base == NULL) {
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003138 iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
3139 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
Peter Lievenc31cb702013-10-24 12:06:58 +02003140 }
3141 qemu_iovec_init_external(&qiov, &iov, 1);
3142
3143 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
Paolo Bonzinib8d71c02013-11-22 13:39:48 +01003144
3145 /* Keep bounce buffer around if it is big enough for all
3146 * all future requests.
3147 */
3148 if (num < max_write_zeroes) {
3149 qemu_vfree(iov.iov_base);
3150 iov.iov_base = NULL;
3151 }
Peter Lievenc31cb702013-10-24 12:06:58 +02003152 }
3153
3154 sector_num += num;
3155 nb_sectors -= num;
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003156 }
3157
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003158 qemu_vfree(iov.iov_base);
3159 return ret;
3160}
3161
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003162/*
Kevin Wolfb404f722013-12-03 14:02:23 +01003163 * Forwards an already correctly aligned write request to the BlockDriver.
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003164 */
Kevin Wolfb404f722013-12-03 14:02:23 +01003165static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
Kevin Wolf65afd212013-12-03 14:55:55 +01003166 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3167 QEMUIOVector *qiov, int flags)
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003168{
3169 BlockDriver *drv = bs->drv;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003170 bool waited;
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003171 int ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003172
Kevin Wolfb404f722013-12-03 14:02:23 +01003173 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3174 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003175
Kevin Wolfb404f722013-12-03 14:02:23 +01003176 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3177 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Benoît Canetcc0681c2013-09-02 14:14:39 +02003178
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003179 waited = wait_serialising_requests(req);
3180 assert(!waited || !req->serialising);
Kevin Wolfaf91f9a2014-02-07 15:35:56 +01003181 assert(req->overlap_offset <= offset);
3182 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
Kevin Wolf244eade2013-12-03 14:30:44 +01003183
Kevin Wolf65afd212013-12-03 14:55:55 +01003184 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Stefan Hajnoczid616b222013-06-24 17:13:10 +02003185
3186 if (ret < 0) {
3187 /* Do nothing, write notifier decided to fail this request */
3188 } else if (flags & BDRV_REQ_ZERO_WRITE) {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003189 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003190 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003191 } else {
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003192 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003193 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3194 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003195 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003196
Paolo Bonzinif05fa4a2012-06-06 00:04:49 +02003197 if (ret == 0 && !bs->enable_write_cache) {
3198 ret = bdrv_co_flush(bs);
3199 }
3200
Fam Zhenge4654d22013-11-13 18:29:43 +08003201 bdrv_set_dirty(bs, sector_num, nb_sectors);
Kevin Wolfda1fa912011-07-14 17:27:13 +02003202
3203 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
3204 bs->wr_highest_sector = sector_num + nb_sectors - 1;
3205 }
Paolo Bonzinidf2a6f22013-09-04 19:00:21 +02003206 if (bs->growable && ret >= 0) {
3207 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3208 }
Kevin Wolfda1fa912011-07-14 17:27:13 +02003209
Stefan Hajnoczi6b7cb242011-10-13 13:08:24 +01003210 return ret;
Kevin Wolfda1fa912011-07-14 17:27:13 +02003211}
3212
Kevin Wolfb404f722013-12-03 14:02:23 +01003213/*
3214 * Handle a write request in coroutine context
3215 */
Kevin Wolf66015532013-12-03 14:40:18 +01003216static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3217 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
Kevin Wolfb404f722013-12-03 14:02:23 +01003218 BdrvRequestFlags flags)
3219{
Kevin Wolf65afd212013-12-03 14:55:55 +01003220 BdrvTrackedRequest req;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003221 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3222 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3223 uint8_t *head_buf = NULL;
3224 uint8_t *tail_buf = NULL;
3225 QEMUIOVector local_qiov;
3226 bool use_local_qiov = false;
Kevin Wolfb404f722013-12-03 14:02:23 +01003227 int ret;
3228
3229 if (!bs->drv) {
3230 return -ENOMEDIUM;
3231 }
3232 if (bs->read_only) {
3233 return -EACCES;
3234 }
Kevin Wolf66015532013-12-03 14:40:18 +01003235 if (bdrv_check_byte_request(bs, offset, bytes)) {
Kevin Wolfb404f722013-12-03 14:02:23 +01003236 return -EIO;
3237 }
3238
Kevin Wolfb404f722013-12-03 14:02:23 +01003239 /* throttling disk I/O */
3240 if (bs->io_limits_enabled) {
Kevin Wolfd5103582014-01-16 13:29:10 +01003241 bdrv_io_limits_intercept(bs, bytes, true);
Kevin Wolfb404f722013-12-03 14:02:23 +01003242 }
3243
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003244 /*
3245 * Align write if necessary by performing a read-modify-write cycle.
3246 * Pad qiov with the read parts and be sure to have a tracked request not
3247 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3248 */
Kevin Wolf65afd212013-12-03 14:55:55 +01003249 tracked_request_begin(&req, bs, offset, bytes, true);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003250
3251 if (offset & (align - 1)) {
3252 QEMUIOVector head_qiov;
3253 struct iovec head_iov;
3254
3255 mark_request_serialising(&req, align);
3256 wait_serialising_requests(&req);
3257
3258 head_buf = qemu_blockalign(bs, align);
3259 head_iov = (struct iovec) {
3260 .iov_base = head_buf,
3261 .iov_len = align,
3262 };
3263 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3264
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003265 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003266 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3267 align, &head_qiov, 0);
3268 if (ret < 0) {
3269 goto fail;
3270 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003271 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003272
3273 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3274 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3275 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3276 use_local_qiov = true;
3277
3278 bytes += offset & (align - 1);
3279 offset = offset & ~(align - 1);
3280 }
3281
3282 if ((offset + bytes) & (align - 1)) {
3283 QEMUIOVector tail_qiov;
3284 struct iovec tail_iov;
3285 size_t tail_bytes;
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003286 bool waited;
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003287
3288 mark_request_serialising(&req, align);
Kevin Wolf28de2dc2014-01-14 11:41:35 +01003289 waited = wait_serialising_requests(&req);
3290 assert(!waited || !use_local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003291
3292 tail_buf = qemu_blockalign(bs, align);
3293 tail_iov = (struct iovec) {
3294 .iov_base = tail_buf,
3295 .iov_len = align,
3296 };
3297 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3298
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003299 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003300 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3301 align, &tail_qiov, 0);
3302 if (ret < 0) {
3303 goto fail;
3304 }
Kevin Wolf9e1cb962014-01-14 15:37:03 +01003305 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003306
3307 if (!use_local_qiov) {
3308 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3309 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3310 use_local_qiov = true;
3311 }
3312
3313 tail_bytes = (offset + bytes) & (align - 1);
3314 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3315
3316 bytes = ROUND_UP(bytes, align);
3317 }
3318
3319 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3320 use_local_qiov ? &local_qiov : qiov,
3321 flags);
3322
3323fail:
Kevin Wolf65afd212013-12-03 14:55:55 +01003324 tracked_request_end(&req);
Kevin Wolfb404f722013-12-03 14:02:23 +01003325
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003326 if (use_local_qiov) {
3327 qemu_iovec_destroy(&local_qiov);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003328 }
Kevin Wolf99c4a852014-02-07 15:29:00 +01003329 qemu_vfree(head_buf);
3330 qemu_vfree(tail_buf);
Kevin Wolf3b8242e2013-12-03 16:34:41 +01003331
Kevin Wolfb404f722013-12-03 14:02:23 +01003332 return ret;
3333}
3334
Kevin Wolf66015532013-12-03 14:40:18 +01003335static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3336 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3337 BdrvRequestFlags flags)
3338{
3339 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3340 return -EINVAL;
3341 }
3342
3343 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3344 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3345}
3346
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003347int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3348 int nb_sectors, QEMUIOVector *qiov)
3349{
3350 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3351
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003352 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3353}
3354
3355int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003356 int64_t sector_num, int nb_sectors,
3357 BdrvRequestFlags flags)
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003358{
Paolo Bonzini94d6ff22013-11-22 13:39:45 +01003359 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003360
Peter Lievend32f35c2013-10-24 12:06:52 +02003361 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3362 flags &= ~BDRV_REQ_MAY_UNMAP;
3363 }
3364
Stefan Hajnoczif08f2dd2012-02-07 13:27:25 +00003365 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
Peter Lievenaa7bfbf2013-10-24 12:06:51 +02003366 BDRV_REQ_ZERO_WRITE | flags);
Stefan Hajnoczic5fbe572011-10-05 17:17:03 +01003367}
3368
bellard83f64092006-08-01 16:21:11 +00003369/**
bellard83f64092006-08-01 16:21:11 +00003370 * Truncate file to 'offset' bytes (needed only for file protocols)
3371 */
3372int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3373{
3374 BlockDriver *drv = bs->drv;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003375 int ret;
bellard83f64092006-08-01 16:21:11 +00003376 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003377 return -ENOMEDIUM;
bellard83f64092006-08-01 16:21:11 +00003378 if (!drv->bdrv_truncate)
3379 return -ENOTSUP;
Naphtali Sprei59f26892009-10-26 16:25:16 +02003380 if (bs->read_only)
3381 return -EACCES;
Marcelo Tosatti85916752011-01-26 12:12:35 -02003382 if (bdrv_in_use(bs))
3383 return -EBUSY;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003384 ret = drv->bdrv_truncate(bs, offset);
3385 if (ret == 0) {
3386 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
Markus Armbruster145feb12011-08-03 15:07:42 +02003387 bdrv_dev_resize_cb(bs);
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003388 }
3389 return ret;
bellard83f64092006-08-01 16:21:11 +00003390}
3391
3392/**
Fam Zheng4a1d5e12011-07-12 19:56:39 +08003393 * Length of a allocated file in bytes. Sparse files are counted by actual
3394 * allocated space. Return < 0 if error or unknown.
3395 */
3396int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3397{
3398 BlockDriver *drv = bs->drv;
3399 if (!drv) {
3400 return -ENOMEDIUM;
3401 }
3402 if (drv->bdrv_get_allocated_file_size) {
3403 return drv->bdrv_get_allocated_file_size(bs);
3404 }
3405 if (bs->file) {
3406 return bdrv_get_allocated_file_size(bs->file);
3407 }
3408 return -ENOTSUP;
3409}
3410
3411/**
bellard83f64092006-08-01 16:21:11 +00003412 * Length of a file in bytes. Return < 0 if error or unknown.
3413 */
3414int64_t bdrv_getlength(BlockDriverState *bs)
3415{
3416 BlockDriver *drv = bs->drv;
3417 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003418 return -ENOMEDIUM;
Stefan Hajnoczi51762282010-04-19 16:56:41 +01003419
Kevin Wolfb94a2612013-10-29 12:18:58 +01003420 if (drv->has_variable_length) {
3421 int ret = refresh_total_sectors(bs, bs->total_sectors);
3422 if (ret < 0) {
3423 return ret;
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003424 }
bellard83f64092006-08-01 16:21:11 +00003425 }
Stefan Hajnoczi46a4e4e2011-03-29 20:04:41 +01003426 return bs->total_sectors * BDRV_SECTOR_SIZE;
bellardfc01f7e2003-06-30 10:03:06 +00003427}
3428
bellard19cb3732006-08-19 11:45:59 +00003429/* return 0 as number of sectors if no device present or error */
ths96b8f132007-12-17 01:35:20 +00003430void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
bellardfc01f7e2003-06-30 10:03:06 +00003431{
bellard19cb3732006-08-19 11:45:59 +00003432 int64_t length;
3433 length = bdrv_getlength(bs);
3434 if (length < 0)
3435 length = 0;
3436 else
Jan Kiszka6ea44302009-11-30 18:21:19 +01003437 length = length >> BDRV_SECTOR_BITS;
bellard19cb3732006-08-19 11:45:59 +00003438 *nb_sectors_ptr = length;
bellardfc01f7e2003-06-30 10:03:06 +00003439}
bellardcf989512004-02-16 21:56:36 +00003440
Paolo Bonziniff06f5f2012-09-28 17:22:54 +02003441void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3442 BlockdevOnError on_write_error)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003443{
3444 bs->on_read_error = on_read_error;
3445 bs->on_write_error = on_write_error;
3446}
3447
Paolo Bonzini1ceee0d2012-09-28 17:22:56 +02003448BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
Markus Armbrusterabd7f682010-06-02 18:55:17 +02003449{
3450 return is_read ? bs->on_read_error : bs->on_write_error;
3451}
3452
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003453BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3454{
3455 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3456
3457 switch (on_err) {
3458 case BLOCKDEV_ON_ERROR_ENOSPC:
3459 return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
3460 case BLOCKDEV_ON_ERROR_STOP:
3461 return BDRV_ACTION_STOP;
3462 case BLOCKDEV_ON_ERROR_REPORT:
3463 return BDRV_ACTION_REPORT;
3464 case BLOCKDEV_ON_ERROR_IGNORE:
3465 return BDRV_ACTION_IGNORE;
3466 default:
3467 abort();
3468 }
3469}
3470
3471/* This is done by device models because, while the block layer knows
3472 * about the error, it does not know whether an operation comes from
3473 * the device or the block layer (from a job, for example).
3474 */
3475void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3476 bool is_read, int error)
3477{
3478 assert(error >= 0);
Paolo Bonzini32c81a42012-09-28 17:22:58 +02003479 bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02003480 if (action == BDRV_ACTION_STOP) {
3481 vm_stop(RUN_STATE_IO_ERROR);
3482 bdrv_iostatus_set_err(bs, error);
3483 }
3484}
3485
bellardb3380822004-03-14 21:38:54 +00003486int bdrv_is_read_only(BlockDriverState *bs)
3487{
3488 return bs->read_only;
3489}
3490
ths985a03b2007-12-24 16:10:43 +00003491int bdrv_is_sg(BlockDriverState *bs)
3492{
3493 return bs->sg;
3494}
3495
Christoph Hellwige900a7b2009-09-04 19:01:15 +02003496int bdrv_enable_write_cache(BlockDriverState *bs)
3497{
3498 return bs->enable_write_cache;
3499}
3500
Paolo Bonzini425b0142012-06-06 00:04:52 +02003501void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3502{
3503 bs->enable_write_cache = wce;
Jeff Cody55b110f2012-09-20 15:13:18 -04003504
3505 /* so a reopen() will preserve wce */
3506 if (wce) {
3507 bs->open_flags |= BDRV_O_CACHE_WB;
3508 } else {
3509 bs->open_flags &= ~BDRV_O_CACHE_WB;
3510 }
Paolo Bonzini425b0142012-06-06 00:04:52 +02003511}
3512
bellardea2384d2004-08-01 21:59:26 +00003513int bdrv_is_encrypted(BlockDriverState *bs)
3514{
3515 if (bs->backing_hd && bs->backing_hd->encrypted)
3516 return 1;
3517 return bs->encrypted;
3518}
3519
aliguoric0f4ce72009-03-05 23:01:01 +00003520int bdrv_key_required(BlockDriverState *bs)
3521{
3522 BlockDriverState *backing_hd = bs->backing_hd;
3523
3524 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3525 return 1;
3526 return (bs->encrypted && !bs->valid_key);
3527}
3528
bellardea2384d2004-08-01 21:59:26 +00003529int bdrv_set_key(BlockDriverState *bs, const char *key)
3530{
3531 int ret;
3532 if (bs->backing_hd && bs->backing_hd->encrypted) {
3533 ret = bdrv_set_key(bs->backing_hd, key);
3534 if (ret < 0)
3535 return ret;
3536 if (!bs->encrypted)
3537 return 0;
3538 }
Shahar Havivifd04a2a2010-03-06 00:26:13 +02003539 if (!bs->encrypted) {
3540 return -EINVAL;
3541 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3542 return -ENOMEDIUM;
3543 }
aliguoric0f4ce72009-03-05 23:01:01 +00003544 ret = bs->drv->bdrv_set_key(bs, key);
aliguoribb5fc202009-03-05 23:01:15 +00003545 if (ret < 0) {
3546 bs->valid_key = 0;
3547 } else if (!bs->valid_key) {
3548 bs->valid_key = 1;
3549 /* call the change callback now, we skipped it on open */
Markus Armbruster7d4b4ba2011-09-06 18:58:59 +02003550 bdrv_dev_change_media_cb(bs, true);
aliguoribb5fc202009-03-05 23:01:15 +00003551 }
aliguoric0f4ce72009-03-05 23:01:01 +00003552 return ret;
bellardea2384d2004-08-01 21:59:26 +00003553}
3554
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003555const char *bdrv_get_format_name(BlockDriverState *bs)
bellardea2384d2004-08-01 21:59:26 +00003556{
Markus Armbrusterf8d6bba2012-06-13 10:11:48 +02003557 return bs->drv ? bs->drv->format_name : NULL;
bellardea2384d2004-08-01 21:59:26 +00003558}
3559
ths5fafdf22007-09-16 21:08:06 +00003560void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
bellardea2384d2004-08-01 21:59:26 +00003561 void *opaque)
3562{
3563 BlockDriver *drv;
3564
Stefan Hajnoczi8a22f022010-04-13 10:29:33 +01003565 QLIST_FOREACH(drv, &bdrv_drivers, list) {
bellardea2384d2004-08-01 21:59:26 +00003566 it(opaque, drv->format_name);
3567 }
3568}
3569
Benoît Canetdc364f42014-01-23 21:31:32 +01003570/* This function is to find block backend bs */
bellardb3380822004-03-14 21:38:54 +00003571BlockDriverState *bdrv_find(const char *name)
3572{
3573 BlockDriverState *bs;
3574
Benoît Canetdc364f42014-01-23 21:31:32 +01003575 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003576 if (!strcmp(name, bs->device_name)) {
bellardb3380822004-03-14 21:38:54 +00003577 return bs;
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003578 }
bellardb3380822004-03-14 21:38:54 +00003579 }
3580 return NULL;
3581}
3582
Benoît Canetdc364f42014-01-23 21:31:32 +01003583/* This function is to find a node in the bs graph */
3584BlockDriverState *bdrv_find_node(const char *node_name)
3585{
3586 BlockDriverState *bs;
3587
3588 assert(node_name);
3589
3590 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3591 if (!strcmp(node_name, bs->node_name)) {
3592 return bs;
3593 }
3594 }
3595 return NULL;
3596}
3597
Benoît Canetc13163f2014-01-23 21:31:34 +01003598/* Put this QMP function here so it can access the static graph_bdrv_states. */
3599BlockDeviceInfoList *bdrv_named_nodes_list(void)
3600{
3601 BlockDeviceInfoList *list, *entry;
3602 BlockDriverState *bs;
3603
3604 list = NULL;
3605 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3606 entry = g_malloc0(sizeof(*entry));
3607 entry->value = bdrv_block_device_info(bs);
3608 entry->next = list;
3609 list = entry;
3610 }
3611
3612 return list;
3613}
3614
Benoît Canet12d3ba82014-01-23 21:31:35 +01003615BlockDriverState *bdrv_lookup_bs(const char *device,
3616 const char *node_name,
3617 Error **errp)
3618{
3619 BlockDriverState *bs = NULL;
3620
Benoît Canet12d3ba82014-01-23 21:31:35 +01003621 if (device) {
3622 bs = bdrv_find(device);
3623
Benoît Canetdd67fa52014-02-12 17:15:06 +01003624 if (bs) {
3625 return bs;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003626 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003627 }
3628
Benoît Canetdd67fa52014-02-12 17:15:06 +01003629 if (node_name) {
3630 bs = bdrv_find_node(node_name);
Benoît Canet12d3ba82014-01-23 21:31:35 +01003631
Benoît Canetdd67fa52014-02-12 17:15:06 +01003632 if (bs) {
3633 return bs;
3634 }
Benoît Canet12d3ba82014-01-23 21:31:35 +01003635 }
3636
Benoît Canetdd67fa52014-02-12 17:15:06 +01003637 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3638 device ? device : "",
3639 node_name ? node_name : "");
3640 return NULL;
Benoît Canet12d3ba82014-01-23 21:31:35 +01003641}
3642
Markus Armbruster2f399b02010-06-02 18:55:20 +02003643BlockDriverState *bdrv_next(BlockDriverState *bs)
3644{
3645 if (!bs) {
3646 return QTAILQ_FIRST(&bdrv_states);
3647 }
Benoît Canetdc364f42014-01-23 21:31:32 +01003648 return QTAILQ_NEXT(bs, device_list);
Markus Armbruster2f399b02010-06-02 18:55:20 +02003649}
3650
aliguori51de9762009-03-05 23:00:43 +00003651void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
bellard81d09122004-07-14 17:21:37 +00003652{
3653 BlockDriverState *bs;
3654
Benoît Canetdc364f42014-01-23 21:31:32 +01003655 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
aliguori51de9762009-03-05 23:00:43 +00003656 it(opaque, bs);
bellard81d09122004-07-14 17:21:37 +00003657 }
3658}
3659
bellardea2384d2004-08-01 21:59:26 +00003660const char *bdrv_get_device_name(BlockDriverState *bs)
3661{
3662 return bs->device_name;
3663}
3664
Markus Armbrusterc8433282012-06-05 16:49:24 +02003665int bdrv_get_flags(BlockDriverState *bs)
3666{
3667 return bs->open_flags;
3668}
3669
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003670int bdrv_flush_all(void)
aliguoric6ca28d2008-10-06 13:55:43 +00003671{
3672 BlockDriverState *bs;
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003673 int result = 0;
aliguoric6ca28d2008-10-06 13:55:43 +00003674
Benoît Canetdc364f42014-01-23 21:31:32 +01003675 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003676 int ret = bdrv_flush(bs);
3677 if (ret < 0 && !result) {
3678 result = ret;
3679 }
Stefan Hajnoczi1b7bdbc2010-04-10 07:02:42 +01003680 }
Kevin Wolff0f0fdf2013-07-05 13:48:01 +02003681
3682 return result;
aliguoric6ca28d2008-10-06 13:55:43 +00003683}
3684
Peter Lieven3ac21622013-06-28 12:47:42 +02003685int bdrv_has_zero_init_1(BlockDriverState *bs)
3686{
3687 return 1;
3688}
3689
Kevin Wolff2feebb2010-04-14 17:30:35 +02003690int bdrv_has_zero_init(BlockDriverState *bs)
3691{
3692 assert(bs->drv);
3693
Paolo Bonzini11212d82013-09-04 19:00:27 +02003694 /* If BS is a copy on write image, it is initialized to
3695 the contents of the base image, which may not be zeroes. */
3696 if (bs->backing_hd) {
3697 return 0;
3698 }
Kevin Wolf336c1c12010-07-28 11:26:29 +02003699 if (bs->drv->bdrv_has_zero_init) {
3700 return bs->drv->bdrv_has_zero_init(bs);
Kevin Wolff2feebb2010-04-14 17:30:35 +02003701 }
3702
Peter Lieven3ac21622013-06-28 12:47:42 +02003703 /* safe default */
3704 return 0;
Kevin Wolff2feebb2010-04-14 17:30:35 +02003705}
3706
Peter Lieven4ce78692013-10-24 12:06:54 +02003707bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3708{
3709 BlockDriverInfo bdi;
3710
3711 if (bs->backing_hd) {
3712 return false;
3713 }
3714
3715 if (bdrv_get_info(bs, &bdi) == 0) {
3716 return bdi.unallocated_blocks_are_zero;
3717 }
3718
3719 return false;
3720}
3721
3722bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3723{
3724 BlockDriverInfo bdi;
3725
3726 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3727 return false;
3728 }
3729
3730 if (bdrv_get_info(bs, &bdi) == 0) {
3731 return bdi.can_write_zeroes_with_unmap;
3732 }
3733
3734 return false;
3735}
3736
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003737typedef struct BdrvCoGetBlockStatusData {
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003738 BlockDriverState *bs;
Miroslav Rezaninab35b2bb2013-02-13 09:09:39 +01003739 BlockDriverState *base;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003740 int64_t sector_num;
3741 int nb_sectors;
3742 int *pnum;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003743 int64_t ret;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003744 bool done;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003745} BdrvCoGetBlockStatusData;
Stefan Hajnoczi376ae3f2011-11-14 12:44:19 +00003746
thsf58c7b32008-06-05 21:53:49 +00003747/*
3748 * Returns true iff the specified sector is present in the disk image. Drivers
3749 * not implementing the functionality are assumed to not support backing files,
3750 * hence all their sectors are reported as allocated.
3751 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003752 * If 'sector_num' is beyond the end of the disk image the return value is 0
3753 * and 'pnum' is set to 0.
3754 *
thsf58c7b32008-06-05 21:53:49 +00003755 * 'pnum' is set to the number of sectors (including and immediately following
3756 * the specified sector) that are known to be in the same
3757 * allocated/unallocated state.
3758 *
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003759 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3760 * beyond the end of the disk image it will be clamped.
thsf58c7b32008-06-05 21:53:49 +00003761 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003762static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
3763 int64_t sector_num,
3764 int nb_sectors, int *pnum)
thsf58c7b32008-06-05 21:53:49 +00003765{
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003766 int64_t length;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003767 int64_t n;
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003768 int64_t ret, ret2;
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003769
Paolo Bonzini617ccb42013-09-04 19:00:23 +02003770 length = bdrv_getlength(bs);
3771 if (length < 0) {
3772 return length;
3773 }
3774
3775 if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003776 *pnum = 0;
3777 return 0;
3778 }
3779
3780 n = bs->total_sectors - sector_num;
3781 if (n < nb_sectors) {
3782 nb_sectors = n;
3783 }
3784
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003785 if (!bs->drv->bdrv_co_get_block_status) {
Stefan Hajnoczibd9533e2011-11-29 13:49:51 +00003786 *pnum = nb_sectors;
Paolo Bonzini918e92d2013-09-04 19:00:37 +02003787 ret = BDRV_BLOCK_DATA;
3788 if (bs->drv->protocol_name) {
3789 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
3790 }
3791 return ret;
thsf58c7b32008-06-05 21:53:49 +00003792 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003793
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003794 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
3795 if (ret < 0) {
Peter Lieven3e0a2332013-09-24 15:35:08 +02003796 *pnum = 0;
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003797 return ret;
3798 }
3799
Peter Lieven92bc50a2013-10-08 14:43:14 +02003800 if (ret & BDRV_BLOCK_RAW) {
3801 assert(ret & BDRV_BLOCK_OFFSET_VALID);
3802 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3803 *pnum, pnum);
3804 }
3805
Peter Lievenc3d86882013-10-24 12:07:04 +02003806 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
3807 if (bdrv_unallocated_blocks_are_zero(bs)) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003808 ret |= BDRV_BLOCK_ZERO;
Peter Lieven1f9db222013-09-24 15:35:09 +02003809 } else if (bs->backing_hd) {
Paolo Bonzinif0ad5712013-09-04 19:00:32 +02003810 BlockDriverState *bs2 = bs->backing_hd;
3811 int64_t length2 = bdrv_getlength(bs2);
3812 if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
3813 ret |= BDRV_BLOCK_ZERO;
3814 }
3815 }
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003816 }
Paolo Bonzini5daa74a2013-09-04 19:00:38 +02003817
3818 if (bs->file &&
3819 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
3820 (ret & BDRV_BLOCK_OFFSET_VALID)) {
3821 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
3822 *pnum, pnum);
3823 if (ret2 >= 0) {
3824 /* Ignore errors. This is just providing extra information, it
3825 * is useful but not necessary.
3826 */
3827 ret |= (ret2 & BDRV_BLOCK_ZERO);
3828 }
3829 }
3830
Paolo Bonzini415b5b02013-09-04 19:00:31 +02003831 return ret;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003832}
3833
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003834/* Coroutine wrapper for bdrv_get_block_status() */
3835static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003836{
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003837 BdrvCoGetBlockStatusData *data = opaque;
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003838 BlockDriverState *bs = data->bs;
3839
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003840 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
3841 data->pnum);
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003842 data->done = true;
3843}
3844
3845/*
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003846 * Synchronous wrapper around bdrv_co_get_block_status().
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003847 *
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003848 * See bdrv_co_get_block_status() for details.
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003849 */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003850int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
3851 int nb_sectors, int *pnum)
Stefan Hajnoczi060f51c2011-11-14 12:44:26 +00003852{
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003853 Coroutine *co;
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003854 BdrvCoGetBlockStatusData data = {
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003855 .bs = bs,
3856 .sector_num = sector_num,
3857 .nb_sectors = nb_sectors,
3858 .pnum = pnum,
3859 .done = false,
3860 };
3861
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003862 if (qemu_in_coroutine()) {
3863 /* Fast-path if already in coroutine context */
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003864 bdrv_get_block_status_co_entry(&data);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003865 } else {
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003866 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003867 qemu_coroutine_enter(co, &data);
3868 while (!data.done) {
3869 qemu_aio_wait();
3870 }
Stefan Hajnoczi6aebab12011-11-14 12:44:25 +00003871 }
3872 return data.ret;
thsf58c7b32008-06-05 21:53:49 +00003873}
3874
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003875int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
3876 int nb_sectors, int *pnum)
3877{
Paolo Bonzini4333bb72013-09-04 19:00:29 +02003878 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
3879 if (ret < 0) {
3880 return ret;
3881 }
3882 return
3883 (ret & BDRV_BLOCK_DATA) ||
3884 ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
Paolo Bonzinib6b8a332013-09-04 19:00:28 +02003885}
3886
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003887/*
3888 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
3889 *
3890 * Return true if the given sector is allocated in any image between
3891 * BASE and TOP (inclusive). BASE can be NULL to check if the given
3892 * sector is allocated in any image of the chain. Return false otherwise.
3893 *
3894 * 'pnum' is set to the number of sectors (including and immediately following
3895 * the specified sector) that are known to be in the same
3896 * allocated/unallocated state.
3897 *
3898 */
Paolo Bonzini4f578632013-09-04 19:00:24 +02003899int bdrv_is_allocated_above(BlockDriverState *top,
3900 BlockDriverState *base,
3901 int64_t sector_num,
3902 int nb_sectors, int *pnum)
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003903{
3904 BlockDriverState *intermediate;
3905 int ret, n = nb_sectors;
3906
3907 intermediate = top;
3908 while (intermediate && intermediate != base) {
3909 int pnum_inter;
Paolo Bonzinibdad13b2013-09-04 19:00:22 +02003910 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
3911 &pnum_inter);
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003912 if (ret < 0) {
3913 return ret;
3914 } else if (ret) {
3915 *pnum = pnum_inter;
3916 return 1;
3917 }
3918
3919 /*
3920 * [sector_num, nb_sectors] is unallocated on top but intermediate
3921 * might have
3922 *
3923 * [sector_num+x, nr_sectors] allocated.
3924 */
Vishvananda Ishaya63ba17d2013-01-24 10:02:08 -08003925 if (n > pnum_inter &&
3926 (intermediate == top ||
3927 sector_num + pnum_inter < intermediate->total_sectors)) {
Paolo Bonzini188a7bb2012-05-08 16:52:01 +02003928 n = pnum_inter;
3929 }
3930
3931 intermediate = intermediate->backing_hd;
3932 }
3933
3934 *pnum = n;
3935 return 0;
3936}
3937
aliguori045df332009-03-05 23:00:48 +00003938const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3939{
3940 if (bs->backing_hd && bs->backing_hd->encrypted)
3941 return bs->backing_file;
3942 else if (bs->encrypted)
3943 return bs->filename;
3944 else
3945 return NULL;
3946}
3947
ths5fafdf22007-09-16 21:08:06 +00003948void bdrv_get_backing_filename(BlockDriverState *bs,
bellard83f64092006-08-01 16:21:11 +00003949 char *filename, int filename_size)
bellardea2384d2004-08-01 21:59:26 +00003950{
Kevin Wolf3574c602011-10-26 11:02:11 +02003951 pstrcpy(filename, filename_size, bs->backing_file);
bellardea2384d2004-08-01 21:59:26 +00003952}
3953
ths5fafdf22007-09-16 21:08:06 +00003954int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
bellardfaea38e2006-08-05 21:31:00 +00003955 const uint8_t *buf, int nb_sectors)
3956{
3957 BlockDriver *drv = bs->drv;
3958 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003959 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003960 if (!drv->bdrv_write_compressed)
3961 return -ENOTSUP;
Kevin Wolffbb7b4e2009-05-08 14:47:24 +02003962 if (bdrv_check_request(bs, sector_num, nb_sectors))
3963 return -EIO;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003964
Fam Zhenge4654d22013-11-13 18:29:43 +08003965 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
Jan Kiszkaa55eb922009-11-30 18:21:19 +01003966
bellardfaea38e2006-08-05 21:31:00 +00003967 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
3968}
ths3b46e622007-09-17 08:09:54 +00003969
bellardfaea38e2006-08-05 21:31:00 +00003970int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3971{
3972 BlockDriver *drv = bs->drv;
3973 if (!drv)
bellard19cb3732006-08-19 11:45:59 +00003974 return -ENOMEDIUM;
bellardfaea38e2006-08-05 21:31:00 +00003975 if (!drv->bdrv_get_info)
3976 return -ENOTSUP;
3977 memset(bdi, 0, sizeof(*bdi));
3978 return drv->bdrv_get_info(bs, bdi);
3979}
3980
Max Reitzeae041f2013-10-09 10:46:16 +02003981ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3982{
3983 BlockDriver *drv = bs->drv;
3984 if (drv && drv->bdrv_get_specific_info) {
3985 return drv->bdrv_get_specific_info(bs);
3986 }
3987 return NULL;
3988}
3989
Christoph Hellwig45566e92009-07-10 23:11:57 +02003990int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
3991 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00003992{
Kevin Wolfcf8074b2013-04-05 21:27:53 +02003993 QEMUIOVector qiov;
3994 struct iovec iov = {
3995 .iov_base = (void *) buf,
3996 .iov_len = size,
3997 };
3998
3999 qemu_iovec_init_external(&qiov, &iov, 1);
4000 return bdrv_writev_vmstate(bs, &qiov, pos);
4001}
4002
4003int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4004{
aliguori178e08a2009-04-05 19:10:55 +00004005 BlockDriver *drv = bs->drv;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004006
4007 if (!drv) {
aliguori178e08a2009-04-05 19:10:55 +00004008 return -ENOMEDIUM;
Kevin Wolfcf8074b2013-04-05 21:27:53 +02004009 } else if (drv->bdrv_save_vmstate) {
4010 return drv->bdrv_save_vmstate(bs, qiov, pos);
4011 } else if (bs->file) {
4012 return bdrv_writev_vmstate(bs->file, qiov, pos);
4013 }
4014
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004015 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004016}
4017
Christoph Hellwig45566e92009-07-10 23:11:57 +02004018int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4019 int64_t pos, int size)
aliguori178e08a2009-04-05 19:10:55 +00004020{
4021 BlockDriver *drv = bs->drv;
4022 if (!drv)
4023 return -ENOMEDIUM;
MORITA Kazutaka7cdb1f62010-05-28 11:44:58 +09004024 if (drv->bdrv_load_vmstate)
4025 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4026 if (bs->file)
4027 return bdrv_load_vmstate(bs->file, buf, pos, size);
4028 return -ENOTSUP;
aliguori178e08a2009-04-05 19:10:55 +00004029}
4030
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004031void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4032{
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004033 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004034 return;
4035 }
4036
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004037 bs->drv->bdrv_debug_event(bs, event);
Kevin Wolf41c695c2012-12-06 14:32:58 +01004038}
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004039
Kevin Wolf41c695c2012-12-06 14:32:58 +01004040int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4041 const char *tag)
4042{
4043 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4044 bs = bs->file;
4045 }
4046
4047 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4048 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4049 }
4050
4051 return -ENOTSUP;
4052}
4053
Fam Zheng4cc70e92013-11-20 10:01:54 +08004054int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4055{
4056 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4057 bs = bs->file;
4058 }
4059
4060 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4061 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4062 }
4063
4064 return -ENOTSUP;
4065}
4066
Kevin Wolf41c695c2012-12-06 14:32:58 +01004067int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4068{
Max Reitz938789e2014-03-10 23:44:08 +01004069 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
Kevin Wolf41c695c2012-12-06 14:32:58 +01004070 bs = bs->file;
4071 }
4072
4073 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4074 return bs->drv->bdrv_debug_resume(bs, tag);
4075 }
4076
4077 return -ENOTSUP;
4078}
4079
4080bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4081{
4082 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4083 bs = bs->file;
4084 }
4085
4086 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4087 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4088 }
4089
4090 return false;
Kevin Wolf8b9b0cc2010-03-15 17:27:00 +01004091}
4092
Blue Swirl199630b2010-07-25 20:49:34 +00004093int bdrv_is_snapshot(BlockDriverState *bs)
4094{
4095 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4096}
4097
Jeff Codyb1b1d782012-10-16 15:49:09 -04004098/* backing_file can either be relative, or absolute, or a protocol. If it is
4099 * relative, it must be relative to the chain. So, passing in bs->filename
4100 * from a BDS as backing_file should not be done, as that may be relative to
4101 * the CWD rather than the chain. */
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004102BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4103 const char *backing_file)
4104{
Jeff Codyb1b1d782012-10-16 15:49:09 -04004105 char *filename_full = NULL;
4106 char *backing_file_full = NULL;
4107 char *filename_tmp = NULL;
4108 int is_protocol = 0;
4109 BlockDriverState *curr_bs = NULL;
4110 BlockDriverState *retval = NULL;
4111
4112 if (!bs || !bs->drv || !backing_file) {
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004113 return NULL;
4114 }
4115
Jeff Codyb1b1d782012-10-16 15:49:09 -04004116 filename_full = g_malloc(PATH_MAX);
4117 backing_file_full = g_malloc(PATH_MAX);
4118 filename_tmp = g_malloc(PATH_MAX);
4119
4120 is_protocol = path_has_protocol(backing_file);
4121
4122 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4123
4124 /* If either of the filename paths is actually a protocol, then
4125 * compare unmodified paths; otherwise make paths relative */
4126 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4127 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4128 retval = curr_bs->backing_hd;
4129 break;
4130 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004131 } else {
Jeff Codyb1b1d782012-10-16 15:49:09 -04004132 /* If not an absolute filename path, make it relative to the current
4133 * image's filename path */
4134 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4135 backing_file);
4136
4137 /* We are going to compare absolute pathnames */
4138 if (!realpath(filename_tmp, filename_full)) {
4139 continue;
4140 }
4141
4142 /* We need to make sure the backing filename we are comparing against
4143 * is relative to the current image filename (or absolute) */
4144 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4145 curr_bs->backing_file);
4146
4147 if (!realpath(filename_tmp, backing_file_full)) {
4148 continue;
4149 }
4150
4151 if (strcmp(backing_file_full, filename_full) == 0) {
4152 retval = curr_bs->backing_hd;
4153 break;
4154 }
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004155 }
4156 }
4157
Jeff Codyb1b1d782012-10-16 15:49:09 -04004158 g_free(filename_full);
4159 g_free(backing_file_full);
4160 g_free(filename_tmp);
4161 return retval;
Marcelo Tosattie8a6bb92012-01-18 14:40:51 +00004162}
4163
Benoît Canetf198fd12012-08-02 10:22:47 +02004164int bdrv_get_backing_file_depth(BlockDriverState *bs)
4165{
4166 if (!bs->drv) {
4167 return 0;
4168 }
4169
4170 if (!bs->backing_hd) {
4171 return 0;
4172 }
4173
4174 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4175}
4176
Jeff Cody79fac562012-09-27 13:29:15 -04004177BlockDriverState *bdrv_find_base(BlockDriverState *bs)
4178{
4179 BlockDriverState *curr_bs = NULL;
4180
4181 if (!bs) {
4182 return NULL;
4183 }
4184
4185 curr_bs = bs;
4186
4187 while (curr_bs->backing_hd) {
4188 curr_bs = curr_bs->backing_hd;
4189 }
4190 return curr_bs;
4191}
4192
bellard83f64092006-08-01 16:21:11 +00004193/**************************************************************/
4194/* async I/Os */
4195
aliguori3b69e4b2009-01-22 16:59:24 +00004196BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
aliguorif141eaf2009-04-07 18:43:24 +00004197 QEMUIOVector *qiov, int nb_sectors,
aliguori3b69e4b2009-01-22 16:59:24 +00004198 BlockDriverCompletionFunc *cb, void *opaque)
4199{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004200 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4201
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004202 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004203 cb, opaque, false);
bellard83f64092006-08-01 16:21:11 +00004204}
4205
aliguorif141eaf2009-04-07 18:43:24 +00004206BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4207 QEMUIOVector *qiov, int nb_sectors,
4208 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004209{
Stefan Hajnoczibbf0a442010-10-05 14:28:53 +01004210 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4211
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004212 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004213 cb, opaque, true);
bellard83f64092006-08-01 16:21:11 +00004214}
4215
Paolo Bonzinid5ef94d2013-11-22 13:39:46 +01004216BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4217 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4218 BlockDriverCompletionFunc *cb, void *opaque)
4219{
4220 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4221
4222 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4223 BDRV_REQ_ZERO_WRITE | flags,
4224 cb, opaque, true);
4225}
4226
Kevin Wolf40b4f532009-09-09 17:53:37 +02004227
4228typedef struct MultiwriteCB {
4229 int error;
4230 int num_requests;
4231 int num_callbacks;
4232 struct {
4233 BlockDriverCompletionFunc *cb;
4234 void *opaque;
4235 QEMUIOVector *free_qiov;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004236 } callbacks[];
4237} MultiwriteCB;
4238
4239static void multiwrite_user_cb(MultiwriteCB *mcb)
4240{
4241 int i;
4242
4243 for (i = 0; i < mcb->num_callbacks; i++) {
4244 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
Stefan Hajnoczi1e1ea482010-04-21 20:35:45 +01004245 if (mcb->callbacks[i].free_qiov) {
4246 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4247 }
Anthony Liguori7267c092011-08-20 22:09:37 -05004248 g_free(mcb->callbacks[i].free_qiov);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004249 }
4250}
4251
4252static void multiwrite_cb(void *opaque, int ret)
4253{
4254 MultiwriteCB *mcb = opaque;
4255
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004256 trace_multiwrite_cb(mcb, ret);
4257
Kevin Wolfcb6d3ca2010-04-01 22:48:44 +02004258 if (ret < 0 && !mcb->error) {
Kevin Wolf40b4f532009-09-09 17:53:37 +02004259 mcb->error = ret;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004260 }
4261
4262 mcb->num_requests--;
4263 if (mcb->num_requests == 0) {
Kevin Wolfde189a12010-07-01 16:08:51 +02004264 multiwrite_user_cb(mcb);
Anthony Liguori7267c092011-08-20 22:09:37 -05004265 g_free(mcb);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004266 }
4267}
4268
4269static int multiwrite_req_compare(const void *a, const void *b)
4270{
Christoph Hellwig77be4362010-05-19 20:53:10 +02004271 const BlockRequest *req1 = a, *req2 = b;
4272
4273 /*
4274 * Note that we can't simply subtract req2->sector from req1->sector
4275 * here as that could overflow the return value.
4276 */
4277 if (req1->sector > req2->sector) {
4278 return 1;
4279 } else if (req1->sector < req2->sector) {
4280 return -1;
4281 } else {
4282 return 0;
4283 }
Kevin Wolf40b4f532009-09-09 17:53:37 +02004284}
4285
4286/*
4287 * Takes a bunch of requests and tries to merge them. Returns the number of
4288 * requests that remain after merging.
4289 */
4290static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4291 int num_reqs, MultiwriteCB *mcb)
4292{
4293 int i, outidx;
4294
4295 // Sort requests by start sector
4296 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4297
4298 // Check if adjacent requests touch the same clusters. If so, combine them,
4299 // filling up gaps with zero sectors.
4300 outidx = 0;
4301 for (i = 1; i < num_reqs; i++) {
4302 int merge = 0;
4303 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4304
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004305 // Handle exactly sequential writes and overlapping writes.
Kevin Wolf40b4f532009-09-09 17:53:37 +02004306 if (reqs[i].sector <= oldreq_last) {
4307 merge = 1;
4308 }
4309
Christoph Hellwige2a305f2010-01-26 14:49:08 +01004310 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4311 merge = 0;
4312 }
4313
Kevin Wolf40b4f532009-09-09 17:53:37 +02004314 if (merge) {
4315 size_t size;
Anthony Liguori7267c092011-08-20 22:09:37 -05004316 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004317 qemu_iovec_init(qiov,
4318 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4319
4320 // Add the first request to the merged one. If the requests are
4321 // overlapping, drop the last sectors of the first request.
4322 size = (reqs[i].sector - reqs[outidx].sector) << 9;
Michael Tokarev1b093c42012-03-12 21:28:06 +04004323 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004324
Paolo Bonzinib6a127a2012-02-21 16:43:52 +01004325 // We should need to add any zeros between the two requests
4326 assert (reqs[i].sector <= oldreq_last);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004327
4328 // Add the second request
Michael Tokarev1b093c42012-03-12 21:28:06 +04004329 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004330
Kevin Wolfcbf1dff2010-05-21 11:09:42 +02004331 reqs[outidx].nb_sectors = qiov->size >> 9;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004332 reqs[outidx].qiov = qiov;
4333
4334 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4335 } else {
4336 outidx++;
4337 reqs[outidx].sector = reqs[i].sector;
4338 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4339 reqs[outidx].qiov = reqs[i].qiov;
4340 }
4341 }
4342
4343 return outidx + 1;
4344}
4345
4346/*
4347 * Submit multiple AIO write requests at once.
4348 *
4349 * On success, the function returns 0 and all requests in the reqs array have
4350 * been submitted. In error case this function returns -1, and any of the
4351 * requests may or may not be submitted yet. In particular, this means that the
4352 * callback will be called for some of the requests, for others it won't. The
4353 * caller must check the error field of the BlockRequest to wait for the right
4354 * callbacks (if error != 0, no callback will be called).
4355 *
4356 * The implementation may modify the contents of the reqs array, e.g. to merge
4357 * requests. However, the fields opaque and error are left unmodified as they
4358 * are used to signal failure for a single request to the caller.
4359 */
4360int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4361{
Kevin Wolf40b4f532009-09-09 17:53:37 +02004362 MultiwriteCB *mcb;
4363 int i;
4364
Ryan Harper301db7c2011-03-07 10:01:04 -06004365 /* don't submit writes if we don't have a medium */
4366 if (bs->drv == NULL) {
4367 for (i = 0; i < num_reqs; i++) {
4368 reqs[i].error = -ENOMEDIUM;
4369 }
4370 return -1;
4371 }
4372
Kevin Wolf40b4f532009-09-09 17:53:37 +02004373 if (num_reqs == 0) {
4374 return 0;
4375 }
4376
4377 // Create MultiwriteCB structure
Anthony Liguori7267c092011-08-20 22:09:37 -05004378 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
Kevin Wolf40b4f532009-09-09 17:53:37 +02004379 mcb->num_requests = 0;
4380 mcb->num_callbacks = num_reqs;
4381
4382 for (i = 0; i < num_reqs; i++) {
4383 mcb->callbacks[i].cb = reqs[i].cb;
4384 mcb->callbacks[i].opaque = reqs[i].opaque;
4385 }
4386
4387 // Check for mergable requests
4388 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4389
Stefan Hajnoczi6d519a52010-05-22 18:15:08 +01004390 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4391
Paolo Bonzinidf9309f2011-11-14 17:50:50 +01004392 /* Run the aio requests. */
4393 mcb->num_requests = num_reqs;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004394 for (i = 0; i < num_reqs; i++) {
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004395 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4396 reqs[i].nb_sectors, reqs[i].flags,
4397 multiwrite_cb, mcb,
4398 true);
Kevin Wolf40b4f532009-09-09 17:53:37 +02004399 }
4400
4401 return 0;
Kevin Wolf40b4f532009-09-09 17:53:37 +02004402}
4403
bellard83f64092006-08-01 16:21:11 +00004404void bdrv_aio_cancel(BlockDriverAIOCB *acb)
pbrookce1a14d2006-08-07 02:38:06 +00004405{
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004406 acb->aiocb_info->cancel(acb);
bellard83f64092006-08-01 16:21:11 +00004407}
4408
4409/**************************************************************/
4410/* async block device emulation */
4411
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004412typedef struct BlockDriverAIOCBSync {
4413 BlockDriverAIOCB common;
4414 QEMUBH *bh;
4415 int ret;
4416 /* vector translation state */
4417 QEMUIOVector *qiov;
4418 uint8_t *bounce;
4419 int is_write;
4420} BlockDriverAIOCBSync;
4421
4422static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
4423{
Kevin Wolfb666d232010-05-05 11:44:39 +02004424 BlockDriverAIOCBSync *acb =
4425 container_of(blockacb, BlockDriverAIOCBSync, common);
Dor Laor6a7ad292009-06-01 12:07:23 +03004426 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004427 acb->bh = NULL;
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004428 qemu_aio_release(acb);
4429}
4430
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004431static const AIOCBInfo bdrv_em_aiocb_info = {
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004432 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4433 .cancel = bdrv_aio_cancel_em,
4434};
4435
bellard83f64092006-08-01 16:21:11 +00004436static void bdrv_aio_bh_cb(void *opaque)
bellardbeac80c2006-06-26 20:08:57 +00004437{
pbrookce1a14d2006-08-07 02:38:06 +00004438 BlockDriverAIOCBSync *acb = opaque;
aliguorif141eaf2009-04-07 18:43:24 +00004439
aliguorif141eaf2009-04-07 18:43:24 +00004440 if (!acb->is_write)
Michael Tokarev03396142012-06-07 20:17:55 +04004441 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
aliguoriceb42de2009-04-07 18:43:28 +00004442 qemu_vfree(acb->bounce);
pbrookce1a14d2006-08-07 02:38:06 +00004443 acb->common.cb(acb->common.opaque, acb->ret);
Dor Laor6a7ad292009-06-01 12:07:23 +03004444 qemu_bh_delete(acb->bh);
Avi Kivity36afc452009-06-23 16:20:36 +03004445 acb->bh = NULL;
pbrookce1a14d2006-08-07 02:38:06 +00004446 qemu_aio_release(acb);
bellardbeac80c2006-06-26 20:08:57 +00004447}
bellardbeac80c2006-06-26 20:08:57 +00004448
aliguorif141eaf2009-04-07 18:43:24 +00004449static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4450 int64_t sector_num,
4451 QEMUIOVector *qiov,
4452 int nb_sectors,
4453 BlockDriverCompletionFunc *cb,
4454 void *opaque,
4455 int is_write)
4456
bellardea2384d2004-08-01 21:59:26 +00004457{
pbrookce1a14d2006-08-07 02:38:06 +00004458 BlockDriverAIOCBSync *acb;
pbrookce1a14d2006-08-07 02:38:06 +00004459
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004460 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
aliguorif141eaf2009-04-07 18:43:24 +00004461 acb->is_write = is_write;
4462 acb->qiov = qiov;
aliguorie268ca52009-04-22 20:20:00 +00004463 acb->bounce = qemu_blockalign(bs, qiov->size);
Paolo Bonzini3f3aace2011-11-14 17:50:54 +01004464 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
aliguorif141eaf2009-04-07 18:43:24 +00004465
4466 if (is_write) {
Michael Tokarevd5e6b162012-06-07 20:21:06 +04004467 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004468 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004469 } else {
Stefan Hajnoczi1ed20ac2011-10-13 13:08:21 +01004470 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
aliguorif141eaf2009-04-07 18:43:24 +00004471 }
4472
pbrookce1a14d2006-08-07 02:38:06 +00004473 qemu_bh_schedule(acb->bh);
aliguorif141eaf2009-04-07 18:43:24 +00004474
pbrookce1a14d2006-08-07 02:38:06 +00004475 return &acb->common;
pbrook7a6cba62006-06-04 11:39:07 +00004476}
4477
aliguorif141eaf2009-04-07 18:43:24 +00004478static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4479 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
pbrookce1a14d2006-08-07 02:38:06 +00004480 BlockDriverCompletionFunc *cb, void *opaque)
bellard83f64092006-08-01 16:21:11 +00004481{
aliguorif141eaf2009-04-07 18:43:24 +00004482 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
bellard83f64092006-08-01 16:21:11 +00004483}
4484
aliguorif141eaf2009-04-07 18:43:24 +00004485static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4486 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4487 BlockDriverCompletionFunc *cb, void *opaque)
4488{
4489 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4490}
4491
Kevin Wolf68485422011-06-30 10:05:46 +02004492
4493typedef struct BlockDriverAIOCBCoroutine {
4494 BlockDriverAIOCB common;
4495 BlockRequest req;
4496 bool is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004497 bool *done;
Kevin Wolf68485422011-06-30 10:05:46 +02004498 QEMUBH* bh;
4499} BlockDriverAIOCBCoroutine;
4500
4501static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
4502{
Kevin Wolfd318aea2012-11-13 16:35:08 +01004503 BlockDriverAIOCBCoroutine *acb =
4504 container_of(blockacb, BlockDriverAIOCBCoroutine, common);
4505 bool done = false;
4506
4507 acb->done = &done;
4508 while (!done) {
4509 qemu_aio_wait();
4510 }
Kevin Wolf68485422011-06-30 10:05:46 +02004511}
4512
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004513static const AIOCBInfo bdrv_em_co_aiocb_info = {
Kevin Wolf68485422011-06-30 10:05:46 +02004514 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4515 .cancel = bdrv_aio_co_cancel_em,
4516};
4517
Paolo Bonzini35246a62011-10-14 10:41:29 +02004518static void bdrv_co_em_bh(void *opaque)
Kevin Wolf68485422011-06-30 10:05:46 +02004519{
4520 BlockDriverAIOCBCoroutine *acb = opaque;
4521
4522 acb->common.cb(acb->common.opaque, acb->req.error);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004523
4524 if (acb->done) {
4525 *acb->done = true;
4526 }
4527
Kevin Wolf68485422011-06-30 10:05:46 +02004528 qemu_bh_delete(acb->bh);
4529 qemu_aio_release(acb);
4530}
4531
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004532/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4533static void coroutine_fn bdrv_co_do_rw(void *opaque)
4534{
4535 BlockDriverAIOCBCoroutine *acb = opaque;
4536 BlockDriverState *bs = acb->common.bs;
4537
4538 if (!acb->is_write) {
4539 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004540 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004541 } else {
4542 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004543 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004544 }
4545
Paolo Bonzini35246a62011-10-14 10:41:29 +02004546 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Stefan Hajnoczib2a61372011-10-13 13:08:23 +01004547 qemu_bh_schedule(acb->bh);
4548}
4549
Kevin Wolf68485422011-06-30 10:05:46 +02004550static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4551 int64_t sector_num,
4552 QEMUIOVector *qiov,
4553 int nb_sectors,
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004554 BdrvRequestFlags flags,
Kevin Wolf68485422011-06-30 10:05:46 +02004555 BlockDriverCompletionFunc *cb,
4556 void *opaque,
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004557 bool is_write)
Kevin Wolf68485422011-06-30 10:05:46 +02004558{
4559 Coroutine *co;
4560 BlockDriverAIOCBCoroutine *acb;
4561
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004562 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolf68485422011-06-30 10:05:46 +02004563 acb->req.sector = sector_num;
4564 acb->req.nb_sectors = nb_sectors;
4565 acb->req.qiov = qiov;
Paolo Bonzinid20d9b72013-11-22 13:39:44 +01004566 acb->req.flags = flags;
Kevin Wolf68485422011-06-30 10:05:46 +02004567 acb->is_write = is_write;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004568 acb->done = NULL;
Kevin Wolf68485422011-06-30 10:05:46 +02004569
Stefan Hajnoczi8c5873d2011-10-13 21:09:28 +01004570 co = qemu_coroutine_create(bdrv_co_do_rw);
Kevin Wolf68485422011-06-30 10:05:46 +02004571 qemu_coroutine_enter(co, acb);
4572
4573 return &acb->common;
4574}
4575
Paolo Bonzini07f07612011-10-17 12:32:12 +02004576static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004577{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004578 BlockDriverAIOCBCoroutine *acb = opaque;
4579 BlockDriverState *bs = acb->common.bs;
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004580
Paolo Bonzini07f07612011-10-17 12:32:12 +02004581 acb->req.error = bdrv_co_flush(bs);
4582 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004583 qemu_bh_schedule(acb->bh);
Christoph Hellwigb2e12bc2009-09-04 19:01:49 +02004584}
4585
Paolo Bonzini07f07612011-10-17 12:32:12 +02004586BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
Alexander Graf016f5cf2010-05-26 17:51:49 +02004587 BlockDriverCompletionFunc *cb, void *opaque)
4588{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004589 trace_bdrv_aio_flush(bs, opaque);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004590
Paolo Bonzini07f07612011-10-17 12:32:12 +02004591 Coroutine *co;
4592 BlockDriverAIOCBCoroutine *acb;
Alexander Graf016f5cf2010-05-26 17:51:49 +02004593
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004594 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Kevin Wolfd318aea2012-11-13 16:35:08 +01004595 acb->done = NULL;
4596
Paolo Bonzini07f07612011-10-17 12:32:12 +02004597 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4598 qemu_coroutine_enter(co, acb);
Alexander Graf016f5cf2010-05-26 17:51:49 +02004599
Alexander Graf016f5cf2010-05-26 17:51:49 +02004600 return &acb->common;
4601}
4602
Paolo Bonzini4265d622011-10-17 12:32:14 +02004603static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4604{
4605 BlockDriverAIOCBCoroutine *acb = opaque;
4606 BlockDriverState *bs = acb->common.bs;
4607
4608 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4609 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
4610 qemu_bh_schedule(acb->bh);
4611}
4612
4613BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4614 int64_t sector_num, int nb_sectors,
4615 BlockDriverCompletionFunc *cb, void *opaque)
4616{
4617 Coroutine *co;
4618 BlockDriverAIOCBCoroutine *acb;
4619
4620 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4621
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004622 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
Paolo Bonzini4265d622011-10-17 12:32:14 +02004623 acb->req.sector = sector_num;
4624 acb->req.nb_sectors = nb_sectors;
Kevin Wolfd318aea2012-11-13 16:35:08 +01004625 acb->done = NULL;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004626 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4627 qemu_coroutine_enter(co, acb);
4628
4629 return &acb->common;
4630}
4631
bellardea2384d2004-08-01 21:59:26 +00004632void bdrv_init(void)
4633{
Anthony Liguori5efa9d52009-05-09 17:03:42 -05004634 module_call_init(MODULE_INIT_BLOCK);
bellardea2384d2004-08-01 21:59:26 +00004635}
pbrookce1a14d2006-08-07 02:38:06 +00004636
Markus Armbrustereb852012009-10-27 18:41:44 +01004637void bdrv_init_with_whitelist(void)
4638{
4639 use_bdrv_whitelist = 1;
4640 bdrv_init();
4641}
4642
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004643void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
Christoph Hellwigc16b5a22009-05-25 12:37:32 +02004644 BlockDriverCompletionFunc *cb, void *opaque)
aliguori6bbff9a2009-03-20 18:25:59 +00004645{
pbrookce1a14d2006-08-07 02:38:06 +00004646 BlockDriverAIOCB *acb;
4647
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004648 acb = g_slice_alloc(aiocb_info->aiocb_size);
4649 acb->aiocb_info = aiocb_info;
pbrookce1a14d2006-08-07 02:38:06 +00004650 acb->bs = bs;
4651 acb->cb = cb;
4652 acb->opaque = opaque;
4653 return acb;
4654}
4655
4656void qemu_aio_release(void *p)
4657{
Stefan Hajnoczid37c9752012-10-31 16:34:36 +01004658 BlockDriverAIOCB *acb = p;
Stefan Hajnoczid7331be2012-10-31 16:34:37 +01004659 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
pbrookce1a14d2006-08-07 02:38:06 +00004660}
bellard19cb3732006-08-19 11:45:59 +00004661
4662/**************************************************************/
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004663/* Coroutine block device emulation */
4664
4665typedef struct CoroutineIOCompletion {
4666 Coroutine *coroutine;
4667 int ret;
4668} CoroutineIOCompletion;
4669
4670static void bdrv_co_io_em_complete(void *opaque, int ret)
4671{
4672 CoroutineIOCompletion *co = opaque;
4673
4674 co->ret = ret;
4675 qemu_coroutine_enter(co->coroutine, NULL);
4676}
4677
4678static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4679 int nb_sectors, QEMUIOVector *iov,
4680 bool is_write)
4681{
4682 CoroutineIOCompletion co = {
4683 .coroutine = qemu_coroutine_self(),
4684 };
4685 BlockDriverAIOCB *acb;
4686
4687 if (is_write) {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004688 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4689 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004690 } else {
Stefan Hajnoczia652d162011-10-05 17:17:02 +01004691 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4692 bdrv_co_io_em_complete, &co);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004693 }
4694
Stefan Hajnoczi59370aa2011-09-30 17:34:58 +01004695 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004696 if (!acb) {
4697 return -EIO;
4698 }
4699 qemu_coroutine_yield();
4700
4701 return co.ret;
4702}
4703
4704static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4705 int64_t sector_num, int nb_sectors,
4706 QEMUIOVector *iov)
4707{
4708 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4709}
4710
4711static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4712 int64_t sector_num, int nb_sectors,
4713 QEMUIOVector *iov)
4714{
4715 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4716}
4717
Paolo Bonzini07f07612011-10-17 12:32:12 +02004718static void coroutine_fn bdrv_flush_co_entry(void *opaque)
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004719{
Paolo Bonzini07f07612011-10-17 12:32:12 +02004720 RwCo *rwco = opaque;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004721
Paolo Bonzini07f07612011-10-17 12:32:12 +02004722 rwco->ret = bdrv_co_flush(rwco->bs);
4723}
4724
4725int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4726{
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004727 int ret;
4728
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004729 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
Paolo Bonzini07f07612011-10-17 12:32:12 +02004730 return 0;
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004731 }
4732
Kevin Wolfca716362011-11-10 18:13:59 +01004733 /* Write back cached data to the OS even with cache=unsafe */
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004734 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004735 if (bs->drv->bdrv_co_flush_to_os) {
4736 ret = bs->drv->bdrv_co_flush_to_os(bs);
4737 if (ret < 0) {
4738 return ret;
4739 }
4740 }
4741
Kevin Wolfca716362011-11-10 18:13:59 +01004742 /* But don't actually force it to the disk with cache=unsafe */
4743 if (bs->open_flags & BDRV_O_NO_FLUSH) {
Kevin Wolfd4c82322012-08-15 12:52:45 +02004744 goto flush_parent;
Kevin Wolfca716362011-11-10 18:13:59 +01004745 }
4746
Kevin Wolfbf736fe2013-06-05 15:17:55 +02004747 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
Kevin Wolfeb489bb2011-11-10 18:10:11 +01004748 if (bs->drv->bdrv_co_flush_to_disk) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004749 ret = bs->drv->bdrv_co_flush_to_disk(bs);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004750 } else if (bs->drv->bdrv_aio_flush) {
4751 BlockDriverAIOCB *acb;
4752 CoroutineIOCompletion co = {
4753 .coroutine = qemu_coroutine_self(),
4754 };
4755
4756 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
4757 if (acb == NULL) {
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004758 ret = -EIO;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004759 } else {
4760 qemu_coroutine_yield();
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004761 ret = co.ret;
Paolo Bonzini07f07612011-10-17 12:32:12 +02004762 }
Paolo Bonzini07f07612011-10-17 12:32:12 +02004763 } else {
4764 /*
4765 * Some block drivers always operate in either writethrough or unsafe
4766 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4767 * know how the server works (because the behaviour is hardcoded or
4768 * depends on server-side configuration), so we can't ensure that
4769 * everything is safe on disk. Returning an error doesn't work because
4770 * that would break guests even if the server operates in writethrough
4771 * mode.
4772 *
4773 * Let's hope the user knows what he's doing.
4774 */
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004775 ret = 0;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004776 }
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004777 if (ret < 0) {
4778 return ret;
4779 }
4780
4781 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4782 * in the case of cache=unsafe, so there are no useless flushes.
4783 */
Kevin Wolfd4c82322012-08-15 12:52:45 +02004784flush_parent:
Paolo Bonzini29cdb252012-03-12 18:26:01 +01004785 return bdrv_co_flush(bs->file);
Paolo Bonzini07f07612011-10-17 12:32:12 +02004786}
4787
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004788void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004789{
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004790 Error *local_err = NULL;
4791 int ret;
4792
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004793 if (!bs->drv) {
4794 return;
Anthony Liguori0f154232011-11-14 15:09:45 -06004795 }
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004796
4797 if (bs->drv->bdrv_invalidate_cache) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004798 bs->drv->bdrv_invalidate_cache(bs, &local_err);
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004799 } else if (bs->file) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004800 bdrv_invalidate_cache(bs->file, &local_err);
4801 }
4802 if (local_err) {
4803 error_propagate(errp, local_err);
4804 return;
Kevin Wolf3456a8d2014-03-11 10:58:39 +01004805 }
4806
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004807 ret = refresh_total_sectors(bs, bs->total_sectors);
4808 if (ret < 0) {
4809 error_setg_errno(errp, -ret, "Could not refresh total sector count");
4810 return;
4811 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004812}
4813
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004814void bdrv_invalidate_cache_all(Error **errp)
Anthony Liguori0f154232011-11-14 15:09:45 -06004815{
4816 BlockDriverState *bs;
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004817 Error *local_err = NULL;
Anthony Liguori0f154232011-11-14 15:09:45 -06004818
Benoît Canetdc364f42014-01-23 21:31:32 +01004819 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Kevin Wolf5a8a30d2014-03-12 15:59:16 +01004820 bdrv_invalidate_cache(bs, &local_err);
4821 if (local_err) {
4822 error_propagate(errp, local_err);
4823 return;
4824 }
Anthony Liguori0f154232011-11-14 15:09:45 -06004825 }
4826}
4827
Benoît Canet07789262012-03-23 08:36:49 +01004828void bdrv_clear_incoming_migration_all(void)
4829{
4830 BlockDriverState *bs;
4831
Benoît Canetdc364f42014-01-23 21:31:32 +01004832 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
Benoît Canet07789262012-03-23 08:36:49 +01004833 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
4834 }
4835}
4836
Paolo Bonzini07f07612011-10-17 12:32:12 +02004837int bdrv_flush(BlockDriverState *bs)
4838{
4839 Coroutine *co;
4840 RwCo rwco = {
4841 .bs = bs,
4842 .ret = NOT_DONE,
4843 };
4844
4845 if (qemu_in_coroutine()) {
4846 /* Fast-path if already in coroutine context */
4847 bdrv_flush_co_entry(&rwco);
4848 } else {
4849 co = qemu_coroutine_create(bdrv_flush_co_entry);
4850 qemu_coroutine_enter(co, &rwco);
4851 while (rwco.ret == NOT_DONE) {
4852 qemu_aio_wait();
4853 }
4854 }
4855
4856 return rwco.ret;
Kevin Wolfe7a8a782011-07-15 16:05:00 +02004857}
4858
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004859typedef struct DiscardCo {
4860 BlockDriverState *bs;
4861 int64_t sector_num;
4862 int nb_sectors;
4863 int ret;
4864} DiscardCo;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004865static void coroutine_fn bdrv_discard_co_entry(void *opaque)
4866{
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004867 DiscardCo *rwco = opaque;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004868
4869 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
4870}
4871
Peter Lieven6f14da52013-10-24 12:06:59 +02004872/* if no limit is specified in the BlockLimits use a default
4873 * of 32768 512-byte sectors (16 MiB) per request.
4874 */
4875#define MAX_DISCARD_DEFAULT 32768
4876
Paolo Bonzini4265d622011-10-17 12:32:14 +02004877int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
4878 int nb_sectors)
4879{
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004880 int max_discard;
4881
Paolo Bonzini4265d622011-10-17 12:32:14 +02004882 if (!bs->drv) {
4883 return -ENOMEDIUM;
4884 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
4885 return -EIO;
4886 } else if (bs->read_only) {
4887 return -EROFS;
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004888 }
4889
Fam Zhenge4654d22013-11-13 18:29:43 +08004890 bdrv_reset_dirty(bs, sector_num, nb_sectors);
Paolo Bonzinidf702c92013-01-14 16:26:58 +01004891
Paolo Bonzini9e8f1832013-02-08 14:06:11 +01004892 /* Do nothing if disabled. */
4893 if (!(bs->open_flags & BDRV_O_UNMAP)) {
4894 return 0;
4895 }
4896
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004897 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004898 return 0;
4899 }
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004900
4901 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
4902 while (nb_sectors > 0) {
4903 int ret;
4904 int num = nb_sectors;
4905
4906 /* align request */
4907 if (bs->bl.discard_alignment &&
4908 num >= bs->bl.discard_alignment &&
4909 sector_num % bs->bl.discard_alignment) {
4910 if (num > bs->bl.discard_alignment) {
4911 num = bs->bl.discard_alignment;
4912 }
4913 num -= sector_num % bs->bl.discard_alignment;
4914 }
4915
4916 /* limit request size */
4917 if (num > max_discard) {
4918 num = max_discard;
4919 }
4920
4921 if (bs->drv->bdrv_co_discard) {
4922 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
4923 } else {
4924 BlockDriverAIOCB *acb;
4925 CoroutineIOCompletion co = {
4926 .coroutine = qemu_coroutine_self(),
4927 };
4928
4929 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
4930 bdrv_co_io_em_complete, &co);
4931 if (acb == NULL) {
4932 return -EIO;
4933 } else {
4934 qemu_coroutine_yield();
4935 ret = co.ret;
4936 }
4937 }
Paolo Bonzini7ce21012013-11-22 13:39:47 +01004938 if (ret && ret != -ENOTSUP) {
Paolo Bonzinid51e9fe2013-11-22 13:39:43 +01004939 return ret;
4940 }
4941
4942 sector_num += num;
4943 nb_sectors -= num;
4944 }
4945 return 0;
Paolo Bonzini4265d622011-10-17 12:32:14 +02004946}
4947
4948int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
4949{
4950 Coroutine *co;
Kevin Wolf775aa8b2013-12-05 12:09:38 +01004951 DiscardCo rwco = {
Paolo Bonzini4265d622011-10-17 12:32:14 +02004952 .bs = bs,
4953 .sector_num = sector_num,
4954 .nb_sectors = nb_sectors,
4955 .ret = NOT_DONE,
4956 };
4957
4958 if (qemu_in_coroutine()) {
4959 /* Fast-path if already in coroutine context */
4960 bdrv_discard_co_entry(&rwco);
4961 } else {
4962 co = qemu_coroutine_create(bdrv_discard_co_entry);
4963 qemu_coroutine_enter(co, &rwco);
4964 while (rwco.ret == NOT_DONE) {
4965 qemu_aio_wait();
4966 }
4967 }
4968
4969 return rwco.ret;
4970}
4971
Kevin Wolff9f05dc2011-07-15 13:50:26 +02004972/**************************************************************/
bellard19cb3732006-08-19 11:45:59 +00004973/* removable device support */
4974
4975/**
4976 * Return TRUE if the media is present
4977 */
4978int bdrv_is_inserted(BlockDriverState *bs)
4979{
4980 BlockDriver *drv = bs->drv;
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004981
bellard19cb3732006-08-19 11:45:59 +00004982 if (!drv)
4983 return 0;
4984 if (!drv->bdrv_is_inserted)
Markus Armbrustera1aff5b2011-09-06 18:58:41 +02004985 return 1;
4986 return drv->bdrv_is_inserted(bs);
bellard19cb3732006-08-19 11:45:59 +00004987}
4988
4989/**
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004990 * Return whether the media changed since the last call to this
4991 * function, or -ENOTSUP if we don't know. Most drivers don't know.
bellard19cb3732006-08-19 11:45:59 +00004992 */
4993int bdrv_media_changed(BlockDriverState *bs)
4994{
4995 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00004996
Markus Armbruster8e49ca42011-08-03 15:08:08 +02004997 if (drv && drv->bdrv_media_changed) {
4998 return drv->bdrv_media_changed(bs);
4999 }
5000 return -ENOTSUP;
bellard19cb3732006-08-19 11:45:59 +00005001}
5002
5003/**
5004 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5005 */
Luiz Capitulinof36f3942012-02-03 16:24:53 -02005006void bdrv_eject(BlockDriverState *bs, bool eject_flag)
bellard19cb3732006-08-19 11:45:59 +00005007{
5008 BlockDriver *drv = bs->drv;
bellard19cb3732006-08-19 11:45:59 +00005009
Markus Armbruster822e1cd2011-07-20 18:23:42 +02005010 if (drv && drv->bdrv_eject) {
5011 drv->bdrv_eject(bs, eject_flag);
bellard19cb3732006-08-19 11:45:59 +00005012 }
Luiz Capitulino6f382ed2012-02-14 13:41:13 -02005013
5014 if (bs->device_name[0] != '\0') {
5015 bdrv_emit_qmp_eject_event(bs, eject_flag);
5016 }
bellard19cb3732006-08-19 11:45:59 +00005017}
5018
bellard19cb3732006-08-19 11:45:59 +00005019/**
5020 * Lock or unlock the media (if it is locked, the user won't be able
5021 * to eject it manually).
5022 */
Markus Armbruster025e8492011-09-06 18:58:47 +02005023void bdrv_lock_medium(BlockDriverState *bs, bool locked)
bellard19cb3732006-08-19 11:45:59 +00005024{
5025 BlockDriver *drv = bs->drv;
5026
Markus Armbruster025e8492011-09-06 18:58:47 +02005027 trace_bdrv_lock_medium(bs, locked);
Stefan Hajnoczib8c6d092011-03-29 20:04:40 +01005028
Markus Armbruster025e8492011-09-06 18:58:47 +02005029 if (drv && drv->bdrv_lock_medium) {
5030 drv->bdrv_lock_medium(bs, locked);
bellard19cb3732006-08-19 11:45:59 +00005031 }
5032}
ths985a03b2007-12-24 16:10:43 +00005033
5034/* needed for generic scsi interface */
5035
5036int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5037{
5038 BlockDriver *drv = bs->drv;
5039
5040 if (drv && drv->bdrv_ioctl)
5041 return drv->bdrv_ioctl(bs, req, buf);
5042 return -ENOTSUP;
5043}
aliguori7d780662009-03-12 19:57:08 +00005044
aliguori221f7152009-03-28 17:28:41 +00005045BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5046 unsigned long int req, void *buf,
5047 BlockDriverCompletionFunc *cb, void *opaque)
aliguori7d780662009-03-12 19:57:08 +00005048{
aliguori221f7152009-03-28 17:28:41 +00005049 BlockDriver *drv = bs->drv;
aliguori7d780662009-03-12 19:57:08 +00005050
aliguori221f7152009-03-28 17:28:41 +00005051 if (drv && drv->bdrv_aio_ioctl)
5052 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5053 return NULL;
aliguori7d780662009-03-12 19:57:08 +00005054}
aliguorie268ca52009-04-22 20:20:00 +00005055
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005056void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005057{
Paolo Bonzini1b7fd722011-11-29 11:35:47 +01005058 bs->guest_block_size = align;
Markus Armbruster7b6f9302011-09-06 18:58:56 +02005059}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005060
aliguorie268ca52009-04-22 20:20:00 +00005061void *qemu_blockalign(BlockDriverState *bs, size_t size)
5062{
Kevin Wolf339064d2013-11-28 10:23:32 +01005063 return qemu_memalign(bdrv_opt_mem_align(bs), size);
aliguorie268ca52009-04-22 20:20:00 +00005064}
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005065
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005066/*
5067 * Check if all memory in this vector is sector aligned.
5068 */
5069bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5070{
5071 int i;
Kevin Wolf339064d2013-11-28 10:23:32 +01005072 size_t alignment = bdrv_opt_mem_align(bs);
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005073
5074 for (i = 0; i < qiov->niov; i++) {
Kevin Wolf339064d2013-11-28 10:23:32 +01005075 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005076 return false;
5077 }
Kevin Wolf339064d2013-11-28 10:23:32 +01005078 if (qiov->iov[i].iov_len % alignment) {
Kevin Wolf1ff735b2013-12-05 13:01:46 +01005079 return false;
5080 }
Stefan Hajnoczic53b1c52013-01-11 16:41:27 +01005081 }
5082
5083 return true;
5084}
5085
Fam Zhenge4654d22013-11-13 18:29:43 +08005086BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005087{
5088 int64_t bitmap_size;
Fam Zhenge4654d22013-11-13 18:29:43 +08005089 BdrvDirtyBitmap *bitmap;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005090
Paolo Bonzini50717e92013-01-21 17:09:45 +01005091 assert((granularity & (granularity - 1)) == 0);
5092
Fam Zhenge4654d22013-11-13 18:29:43 +08005093 granularity >>= BDRV_SECTOR_BITS;
5094 assert(granularity);
5095 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
5096 bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
5097 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5098 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5099 return bitmap;
5100}
5101
5102void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5103{
5104 BdrvDirtyBitmap *bm, *next;
5105 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5106 if (bm == bitmap) {
5107 QLIST_REMOVE(bitmap, list);
5108 hbitmap_free(bitmap->bitmap);
5109 g_free(bitmap);
5110 return;
Jan Kiszkaa55eb922009-11-30 18:21:19 +01005111 }
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005112 }
5113}
5114
Fam Zheng21b56832013-11-13 18:29:44 +08005115BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5116{
5117 BdrvDirtyBitmap *bm;
5118 BlockDirtyInfoList *list = NULL;
5119 BlockDirtyInfoList **plist = &list;
5120
5121 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5122 BlockDirtyInfo *info = g_malloc0(sizeof(BlockDirtyInfo));
5123 BlockDirtyInfoList *entry = g_malloc0(sizeof(BlockDirtyInfoList));
5124 info->count = bdrv_get_dirty_count(bs, bm);
5125 info->granularity =
5126 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5127 entry->value = info;
5128 *plist = entry;
5129 plist = &entry->next;
5130 }
5131
5132 return list;
5133}
5134
Fam Zhenge4654d22013-11-13 18:29:43 +08005135int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005136{
Fam Zhenge4654d22013-11-13 18:29:43 +08005137 if (bitmap) {
5138 return hbitmap_get(bitmap->bitmap, sector);
lirans@il.ibm.com7cd1e322009-11-02 15:40:41 +02005139 } else {
5140 return 0;
5141 }
5142}
5143
Fam Zhenge4654d22013-11-13 18:29:43 +08005144void bdrv_dirty_iter_init(BlockDriverState *bs,
5145 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
Paolo Bonzini1755da12012-10-18 16:49:18 +02005146{
Fam Zhenge4654d22013-11-13 18:29:43 +08005147 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
Paolo Bonzini1755da12012-10-18 16:49:18 +02005148}
5149
5150void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5151 int nr_sectors)
5152{
Fam Zhenge4654d22013-11-13 18:29:43 +08005153 BdrvDirtyBitmap *bitmap;
5154 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5155 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
Paolo Bonzini8f0720e2013-01-21 17:09:41 +01005156 }
Liran Schouraaa0eb72010-01-26 10:31:48 +02005157}
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005158
Fam Zhenge4654d22013-11-13 18:29:43 +08005159void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5160{
5161 BdrvDirtyBitmap *bitmap;
5162 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5163 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5164 }
5165}
5166
5167int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5168{
5169 return hbitmap_count(bitmap->bitmap);
5170}
5171
Fam Zheng9fcb0252013-08-23 09:14:46 +08005172/* Get a reference to bs */
5173void bdrv_ref(BlockDriverState *bs)
5174{
5175 bs->refcnt++;
5176}
5177
5178/* Release a previously grabbed reference to bs.
5179 * If after releasing, reference count is zero, the BlockDriverState is
5180 * deleted. */
5181void bdrv_unref(BlockDriverState *bs)
5182{
5183 assert(bs->refcnt > 0);
5184 if (--bs->refcnt == 0) {
5185 bdrv_delete(bs);
5186 }
5187}
5188
Marcelo Tosattidb593f22011-01-26 12:12:34 -02005189void bdrv_set_in_use(BlockDriverState *bs, int in_use)
5190{
5191 assert(bs->in_use != in_use);
5192 bs->in_use = in_use;
5193}
5194
5195int bdrv_in_use(BlockDriverState *bs)
5196{
5197 return bs->in_use;
5198}
5199
Luiz Capitulino28a72822011-09-26 17:43:50 -03005200void bdrv_iostatus_enable(BlockDriverState *bs)
5201{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005202 bs->iostatus_enabled = true;
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005203 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005204}
5205
5206/* The I/O status is only enabled if the drive explicitly
5207 * enables it _and_ the VM is configured to stop on errors */
5208bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5209{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005210 return (bs->iostatus_enabled &&
Paolo Bonzini92aa5c62012-09-28 17:22:55 +02005211 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5212 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5213 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
Luiz Capitulino28a72822011-09-26 17:43:50 -03005214}
5215
5216void bdrv_iostatus_disable(BlockDriverState *bs)
5217{
Luiz Capitulinod6bf2792011-10-14 17:11:23 -03005218 bs->iostatus_enabled = false;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005219}
5220
5221void bdrv_iostatus_reset(BlockDriverState *bs)
5222{
5223 if (bdrv_iostatus_is_enabled(bs)) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005224 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
Paolo Bonzini3bd293c2012-10-18 16:49:27 +02005225 if (bs->job) {
5226 block_job_iostatus_reset(bs->job);
5227 }
Luiz Capitulino28a72822011-09-26 17:43:50 -03005228 }
5229}
5230
Luiz Capitulino28a72822011-09-26 17:43:50 -03005231void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5232{
Paolo Bonzini3e1caa52012-09-28 17:22:57 +02005233 assert(bdrv_iostatus_is_enabled(bs));
5234 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
Luiz Capitulino58e21ef2011-10-14 17:22:24 -03005235 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5236 BLOCK_DEVICE_IO_STATUS_FAILED;
Luiz Capitulino28a72822011-09-26 17:43:50 -03005237 }
5238}
5239
Christoph Hellwiga597e792011-08-25 08:26:01 +02005240void
5241bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
5242 enum BlockAcctType type)
5243{
5244 assert(type < BDRV_MAX_IOTYPE);
5245
5246 cookie->bytes = bytes;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005247 cookie->start_time_ns = get_clock();
Christoph Hellwiga597e792011-08-25 08:26:01 +02005248 cookie->type = type;
5249}
5250
5251void
5252bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
5253{
5254 assert(cookie->type < BDRV_MAX_IOTYPE);
5255
5256 bs->nr_bytes[cookie->type] += cookie->bytes;
5257 bs->nr_ops[cookie->type]++;
Christoph Hellwigc488c7f2011-08-25 08:26:10 +02005258 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
Christoph Hellwiga597e792011-08-25 08:26:01 +02005259}
5260
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005261void bdrv_img_create(const char *filename, const char *fmt,
5262 const char *base_filename, const char *base_fmt,
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005263 char *options, uint64_t img_size, int flags,
5264 Error **errp, bool quiet)
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005265{
5266 QEMUOptionParameter *param = NULL, *create_options = NULL;
Kevin Wolfd2208942011-06-01 14:03:31 +02005267 QEMUOptionParameter *backing_fmt, *backing_file, *size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005268 BlockDriver *drv, *proto_drv;
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005269 BlockDriver *backing_drv = NULL;
Max Reitzcc84d902013-09-06 17:14:26 +02005270 Error *local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005271 int ret = 0;
5272
5273 /* Find driver and parse its options */
5274 drv = bdrv_find_format(fmt);
5275 if (!drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005276 error_setg(errp, "Unknown file format '%s'", fmt);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005277 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005278 }
5279
Kevin Wolf98289622013-07-10 15:47:39 +02005280 proto_drv = bdrv_find_protocol(filename, true);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005281 if (!proto_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005282 error_setg(errp, "Unknown protocol '%s'", filename);
Luiz Capitulinod92ada22012-11-30 10:52:09 -02005283 return;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005284 }
5285
5286 create_options = append_option_parameters(create_options,
5287 drv->create_options);
5288 create_options = append_option_parameters(create_options,
5289 proto_drv->create_options);
5290
5291 /* Create parameter list with default values */
5292 param = parse_option_parameters("", create_options, param);
5293
5294 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
5295
5296 /* Parse -o options */
5297 if (options) {
5298 param = parse_option_parameters(options, create_options, param);
5299 if (param == NULL) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005300 error_setg(errp, "Invalid options for file format '%s'.", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005301 goto out;
5302 }
5303 }
5304
5305 if (base_filename) {
5306 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
5307 base_filename)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005308 error_setg(errp, "Backing file not supported for file format '%s'",
5309 fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005310 goto out;
5311 }
5312 }
5313
5314 if (base_fmt) {
5315 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005316 error_setg(errp, "Backing file format not supported for file "
5317 "format '%s'", fmt);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005318 goto out;
5319 }
5320 }
5321
Jes Sorensen792da932010-12-16 13:52:17 +01005322 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
5323 if (backing_file && backing_file->value.s) {
5324 if (!strcmp(filename, backing_file->value.s)) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005325 error_setg(errp, "Error: Trying to create an image with the "
5326 "same filename as the backing file");
Jes Sorensen792da932010-12-16 13:52:17 +01005327 goto out;
5328 }
5329 }
5330
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005331 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
5332 if (backing_fmt && backing_fmt->value.s) {
Stefan Hajnoczi96df67d2011-01-24 09:32:20 +00005333 backing_drv = bdrv_find_format(backing_fmt->value.s);
5334 if (!backing_drv) {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005335 error_setg(errp, "Unknown backing file format '%s'",
5336 backing_fmt->value.s);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005337 goto out;
5338 }
5339 }
5340
5341 // The size for the image must always be specified, with one exception:
5342 // If we are using a backing file, we can obtain the size from there
Kevin Wolfd2208942011-06-01 14:03:31 +02005343 size = get_option_parameter(param, BLOCK_OPT_SIZE);
5344 if (size && size->value.n == -1) {
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005345 if (backing_file && backing_file->value.s) {
Max Reitz66f6b812013-12-03 14:57:52 +01005346 BlockDriverState *bs;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005347 uint64_t size;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005348 char buf[32];
Paolo Bonzini63090da2012-04-12 14:01:03 +02005349 int back_flags;
5350
5351 /* backing files always opened read-only */
5352 back_flags =
5353 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005354
Max Reitzf67503e2014-02-18 18:33:05 +01005355 bs = NULL;
Max Reitzddf56362014-02-18 18:33:06 +01005356 ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags,
Max Reitzcc84d902013-09-06 17:14:26 +02005357 backing_drv, &local_err);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005358 if (ret < 0) {
Max Reitzcc84d902013-09-06 17:14:26 +02005359 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5360 backing_file->value.s,
5361 error_get_pretty(local_err));
5362 error_free(local_err);
5363 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005364 goto out;
5365 }
5366 bdrv_get_geometry(bs, &size);
5367 size *= 512;
5368
5369 snprintf(buf, sizeof(buf), "%" PRId64, size);
5370 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
Max Reitz66f6b812013-12-03 14:57:52 +01005371
5372 bdrv_unref(bs);
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005373 } else {
Luiz Capitulino71c79812012-11-30 10:52:04 -02005374 error_setg(errp, "Image creation needs a size parameter");
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005375 goto out;
5376 }
5377 }
5378
Miroslav Rezaninaf382d432013-02-13 09:09:40 +01005379 if (!quiet) {
5380 printf("Formatting '%s', fmt=%s ", filename, fmt);
5381 print_option_parameters(param);
5382 puts("");
5383 }
Max Reitzcc84d902013-09-06 17:14:26 +02005384 ret = bdrv_create(drv, filename, param, &local_err);
5385 if (ret == -EFBIG) {
5386 /* This is generally a better message than whatever the driver would
5387 * deliver (especially because of the cluster_size_hint), since that
5388 * is most probably not much different from "image too large". */
5389 const char *cluster_size_hint = "";
5390 if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
5391 cluster_size_hint = " (try using a larger cluster size)";
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005392 }
Max Reitzcc84d902013-09-06 17:14:26 +02005393 error_setg(errp, "The image size is too large for file format '%s'"
5394 "%s", fmt, cluster_size_hint);
5395 error_free(local_err);
5396 local_err = NULL;
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005397 }
5398
5399out:
5400 free_option_parameters(create_options);
5401 free_option_parameters(param);
5402
Markus Armbruster84d18f02014-01-30 15:07:28 +01005403 if (local_err) {
Max Reitzcc84d902013-09-06 17:14:26 +02005404 error_propagate(errp, local_err);
5405 }
Jes Sorensenf88e1a42010-12-16 13:52:15 +01005406}
Stefan Hajnoczi85d126f2013-03-07 13:41:48 +01005407
5408AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5409{
5410 /* Currently BlockDriverState always uses the main loop AioContext */
5411 return qemu_get_aio_context();
5412}
Stefan Hajnoczid616b222013-06-24 17:13:10 +02005413
5414void bdrv_add_before_write_notifier(BlockDriverState *bs,
5415 NotifierWithReturn *notifier)
5416{
5417 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5418}
Max Reitz6f176b42013-09-03 10:09:50 +02005419
5420int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
5421{
5422 if (bs->drv->bdrv_amend_options == NULL) {
5423 return -ENOTSUP;
5424 }
5425 return bs->drv->bdrv_amend_options(bs, options);
5426}
Benoît Canetf6186f42013-10-02 14:33:48 +02005427
Benoît Canetb5042a32014-03-03 19:11:34 +01005428/* This function will be called by the bdrv_recurse_is_first_non_filter method
5429 * of block filter and by bdrv_is_first_non_filter.
5430 * It is used to test if the given bs is the candidate or recurse more in the
5431 * node graph.
Benoît Canet212a5a82014-01-23 21:31:36 +01005432 */
Benoît Canet212a5a82014-01-23 21:31:36 +01005433bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5434 BlockDriverState *candidate)
Benoît Canetf6186f42013-10-02 14:33:48 +02005435{
Benoît Canetb5042a32014-03-03 19:11:34 +01005436 /* return false if basic checks fails */
5437 if (!bs || !bs->drv) {
5438 return false;
5439 }
5440
5441 /* the code reached a non block filter driver -> check if the bs is
5442 * the same as the candidate. It's the recursion termination condition.
5443 */
5444 if (!bs->drv->is_filter) {
5445 return bs == candidate;
5446 }
5447 /* Down this path the driver is a block filter driver */
5448
5449 /* If the block filter recursion method is defined use it to recurse down
5450 * the node graph.
5451 */
5452 if (bs->drv->bdrv_recurse_is_first_non_filter) {
Benoît Canet212a5a82014-01-23 21:31:36 +01005453 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5454 }
5455
Benoît Canetb5042a32014-03-03 19:11:34 +01005456 /* the driver is a block filter but don't allow to recurse -> return false
5457 */
5458 return false;
Benoît Canet212a5a82014-01-23 21:31:36 +01005459}
5460
5461/* This function checks if the candidate is the first non filter bs down it's
5462 * bs chain. Since we don't have pointers to parents it explore all bs chains
5463 * from the top. Some filters can choose not to pass down the recursion.
5464 */
5465bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5466{
5467 BlockDriverState *bs;
5468
5469 /* walk down the bs forest recursively */
5470 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5471 bool perm;
5472
Benoît Canetb5042a32014-03-03 19:11:34 +01005473 /* try to recurse in this top level bs */
Kevin Wolfe6dc8a12014-02-04 11:45:31 +01005474 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
Benoît Canet212a5a82014-01-23 21:31:36 +01005475
5476 /* candidate is the first non filter */
5477 if (perm) {
5478 return true;
5479 }
5480 }
5481
5482 return false;
Benoît Canetf6186f42013-10-02 14:33:48 +02005483}