From 161253e2d0a83a1b33bca019c6e926013e1a03db Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:35 +0100 Subject: [PATCH 01/11] block: use 1 MB bounce buffers for crypto instead of 16KB Using 16KB bounce buffers creates a significant performance penalty for I/O to encrypted volumes on storage which high I/O latency (rotating rust & network drives), because it triggers lots of fairly small I/O operations. On tests with rotating rust, and cache=none|directsync, write speed increased from 2MiB/s to 32MiB/s, on a par with that achieved by the in-kernel luks driver. With other cache modes the in-kernel driver is still notably faster because it is able to report completion of the I/O request before any encryption is done, while the in-QEMU driver must encrypt the data before completion. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-2-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 58ef6f2f52..684cabeaf8 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -379,7 +379,11 @@ static void block_crypto_close(BlockDriverState *bs) } -#define BLOCK_CRYPTO_MAX_SECTORS 32 +/* + * 1 MB bounce buffer gives good performance / memory tradeoff + * when using cache=none|directsync. + */ +#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, @@ -396,12 +400,11 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we don't wish to expose cipher text + * in qiov which points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -411,8 +414,8 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_reset(&hd_qiov); @@ -464,12 +467,11 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, qemu_iovec_init(&hd_qiov, qiov->niov); - /* Bounce buffer so we have a linear mem region for - * entire sector. XXX optimize so we avoid bounce - * buffer in case that qiov->niov == 1 + /* Bounce buffer because we're not permitted to touch + * contents of qiov - it points to guest memory. */ cipher_data = - qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE, qiov->size)); if (cipher_data == NULL) { ret = -ENOMEM; @@ -479,8 +481,8 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors) { cur_nr_sectors = remaining_sectors; - if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { - cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { + cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); } qemu_iovec_to_buf(qiov, bytes_done, From 850f49de9b57511dcaf2cd7e45059f8f38fadf3b Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:36 +0100 Subject: [PATCH 02/11] crypto: expose encryption sector size in APIs While current encryption schemes all have a fixed sector size of 512 bytes, this is not guaranteed to be the case in future. Expose the sector size in the APIs so the block layer can remove assumptions about fixed 512 byte sectors. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-3-berrange@redhat.com Signed-off-by: Max Reitz --- crypto/block-luks.c | 6 ++++-- crypto/block-qcow.c | 1 + crypto/block.c | 6 ++++++ crypto/blockpriv.h | 1 + include/crypto/block.h | 15 +++++++++++++++ 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/crypto/block-luks.c b/crypto/block-luks.c index 36bc856084..a9062bb0f2 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -846,8 +846,9 @@ qcrypto_block_luks_open(QCryptoBlock *block, } } + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; block->payload_offset = luks->header.payload_offset * - QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->sector_size; luks->cipher_alg = cipheralg; luks->cipher_mode = ciphermode; @@ -1240,8 +1241,9 @@ qcrypto_block_luks_create(QCryptoBlock *block, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) * QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS); + block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; block->payload_offset = luks->header.payload_offset * - QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; + block->sector_size; /* Reserve header space to match payload offset */ initfunc(block, block->payload_offset, opaque, &local_err); diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c index a456fe338b..4dd594a9ba 100644 --- a/crypto/block-qcow.c +++ b/crypto/block-qcow.c @@ -80,6 +80,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block, goto fail; } + block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE; block->payload_offset = 0; return 0; diff --git a/crypto/block.c b/crypto/block.c index c382393d9a..a7a9ad240e 100644 --- a/crypto/block.c +++ b/crypto/block.c @@ -170,6 +170,12 @@ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block) } +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block) +{ + return block->sector_size; +} + + void qcrypto_block_free(QCryptoBlock *block) { if (!block) { diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h index 0edb810e22..d227522d88 100644 --- a/crypto/blockpriv.h +++ b/crypto/blockpriv.h @@ -36,6 +36,7 @@ struct QCryptoBlock { QCryptoHashAlgorithm kdfhash; size_t niv; uint64_t payload_offset; /* In bytes */ + uint64_t sector_size; /* In bytes */ }; struct QCryptoBlockDriver { diff --git a/include/crypto/block.h b/include/crypto/block.h index f0e543bee1..13232b2472 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -240,6 +240,21 @@ QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block); */ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block); +/** + * qcrypto_block_get_sector_size: + * @block: the block encryption object + * + * Get the size of sectors used for payload encryption. A new + * IV is used at the start of each sector. The encryption + * sector size is not required to match the sector size of the + * underlying storage. For example LUKS will always use a 512 + * byte sector size, even if the volume is on a disk with 4k + * sectors. + * + * Returns: the sector in bytes + */ +uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block); + /** * qcrypto_block_free: * @block: the block encryption object From 31376555c7b447afb1bf9084eacbb8f566ff6b9d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:37 +0100 Subject: [PATCH 03/11] block: fix data type casting for crypto payload offset The crypto APIs report the offset of the data payload as an uint64_t type, but the block driver is casting to size_t or ssize_t which will potentially truncate. Most of the block APIs use int64_t for offsets meanwhile, so even if using uint64_t in the crypto block driver we are still at risk of truncation. Change the block crypto driver to use uint64_t, but add asserts that the value is less than INT64_MAX. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-4-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 684cabeaf8..61f5d77bc0 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -364,8 +364,9 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset, PreallocMode prealloc, Error **errp) { BlockCrypto *crypto = bs->opaque; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + assert(payload_offset < (INT64_MAX - offset)); offset += payload_offset; @@ -395,8 +396,9 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -462,8 +464,9 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - size_t payload_offset = + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block) / 512; + assert(payload_offset < (INT64_MAX / 512)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -524,7 +527,9 @@ static int64_t block_crypto_getlength(BlockDriverState *bs) BlockCrypto *crypto = bs->opaque; int64_t len = bdrv_getlength(bs->file->bs); - ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t offset = qcrypto_block_get_payload_offset(crypto->block); + assert(offset < INT64_MAX); + assert(offset < len); len -= offset; From a73466fbad6d48ba356940474cd72da602373304 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:38 +0100 Subject: [PATCH 04/11] block: convert crypto driver to bdrv_co_preadv|pwritev Make the crypto driver implement the bdrv_co_preadv|pwritev callbacks, and also use bdrv_co_preadv|pwritev for I/O with the protocol driver beneath. This replaces sector based I/O with byte based I/O, and allows us to stop assuming the physical sector size matches the encryption sector size. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-5-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 106 +++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 61f5d77bc0..965c173b01 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -387,18 +387,23 @@ static void block_crypto_close(BlockDriverState *bs) #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024) static coroutine_fn int -block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -413,37 +418,29 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; - - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_readv(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } - qemu_iovec_from_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); + qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -455,18 +452,23 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, static coroutine_fn int -block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, - int remaining_sectors, QEMUIOVector *qiov) +block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BlockCrypto *crypto = bs->opaque; - int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t cur_bytes; /* number of bytes in current iteration */ uint64_t bytes_done = 0; uint8_t *cipher_data = NULL; QEMUIOVector hd_qiov; int ret = 0; - uint64_t payload_offset = - qcrypto_block_get_payload_offset(crypto->block) / 512; - assert(payload_offset < (INT64_MAX / 512)); + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); + uint64_t sector_num = offset / sector_size; + + assert(!flags); + assert(payload_offset < INT64_MAX); + assert(QEMU_IS_ALIGNED(offset, sector_size)); + assert(QEMU_IS_ALIGNED(bytes, sector_size)); qemu_iovec_init(&hd_qiov, qiov->niov); @@ -481,37 +483,29 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, goto cleanup; } - while (remaining_sectors) { - cur_nr_sectors = remaining_sectors; + while (bytes) { + cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE); - if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) { - cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512); - } + qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - qemu_iovec_to_buf(qiov, bytes_done, - cipher_data, cur_nr_sectors * 512); - - if (qcrypto_block_encrypt(crypto->block, - sector_num, - cipher_data, cur_nr_sectors * 512, - NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, + cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_reset(&hd_qiov); - qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); - ret = bdrv_co_writev(bs->file, - payload_offset + sector_num, - cur_nr_sectors, &hd_qiov); + ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, + cur_bytes, &hd_qiov, 0); if (ret < 0) { goto cleanup; } - remaining_sectors -= cur_nr_sectors; - sector_num += cur_nr_sectors; - bytes_done += cur_nr_sectors * 512; + sector_num += cur_bytes / sector_size; + bytes -= cur_bytes; + bytes_done += cur_bytes; } cleanup: @@ -521,6 +515,13 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, return ret; } +static void block_crypto_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); + bs->bl.request_alignment = sector_size; /* No sub-sector I/O */ +} + static int64_t block_crypto_getlength(BlockDriverState *bs) { @@ -620,8 +621,9 @@ BlockDriver bdrv_crypto_luks = { .bdrv_truncate = block_crypto_truncate, .create_opts = &block_crypto_create_opts_luks, - .bdrv_co_readv = block_crypto_co_readv, - .bdrv_co_writev = block_crypto_co_writev, + .bdrv_refresh_limits = block_crypto_refresh_limits, + .bdrv_co_preadv = block_crypto_co_preadv, + .bdrv_co_pwritev = block_crypto_co_pwritev, .bdrv_getlength = block_crypto_getlength, .bdrv_get_info = block_crypto_get_info_luks, .bdrv_get_specific_info = block_crypto_get_specific_info_luks, From 4609742a495d98ac358098e10d91890185dcdc60 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:39 +0100 Subject: [PATCH 05/11] block: convert qcrypto_block_encrypt|decrypt to take bytes offset Instead of sector offset, take the bytes offset when encrypting or decrypting data. Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-6-berrange@redhat.com Reviewed-by: Eric Blake Reviewed-by: Max Reitz Signed-off-by: Max Reitz --- block/crypto.c | 12 ++++-------- block/qcow.c | 11 +++++++---- block/qcow2-cluster.c | 8 +++----- block/qcow2.c | 4 ++-- crypto/block-luks.c | 12 ++++++++---- crypto/block-qcow.c | 12 ++++++++---- crypto/block.c | 20 ++++++++++++++------ crypto/blockpriv.h | 4 ++-- include/crypto/block.h | 14 ++++++++------ 9 files changed, 56 insertions(+), 41 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index 965c173b01..edf53d49d1 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -398,7 +398,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -430,15 +429,14 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_decrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes); - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } @@ -463,7 +461,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int ret = 0; uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - uint64_t sector_num = offset / sector_size; assert(!flags); assert(payload_offset < INT64_MAX); @@ -488,8 +485,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes); - if (qcrypto_block_encrypt(crypto->block, sector_num, cipher_data, - cur_bytes, NULL) < 0) { + if (qcrypto_block_encrypt(crypto->block, offset + bytes_done, + cipher_data, cur_bytes, NULL) < 0) { ret = -EIO; goto cleanup; } @@ -503,7 +500,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, goto cleanup; } - sector_num += cur_bytes / sector_size; bytes -= cur_bytes; bytes_done += cur_bytes; } diff --git a/block/qcow.c b/block/qcow.c index f450b00cfc..9569deeaf0 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -478,7 +478,9 @@ static int get_cluster_offset(BlockDriverState *bs, for(i = 0; i < s->cluster_sectors; i++) { if (i < n_start || i >= n_end) { memset(s->cluster_data, 0x00, 512); - if (qcrypto_block_encrypt(s->crypto, start_sect + i, + if (qcrypto_block_encrypt(s->crypto, + (start_sect + i) * + BDRV_SECTOR_SIZE, s->cluster_data, BDRV_SECTOR_SIZE, NULL) < 0) { @@ -668,7 +670,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_decrypt(s->crypto, sector_num, buf, + if (qcrypto_block_decrypt(s->crypto, + sector_num * BDRV_SECTOR_SIZE, buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; @@ -740,8 +743,8 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, } if (bs->encrypted) { assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector_num, buf, - n * BDRV_SECTOR_SIZE, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, sector_num * BDRV_SECTOR_SIZE, + buf, n * BDRV_SECTOR_SIZE, NULL) < 0) { ret = -EIO; break; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d2518d1893..0e5aec81cb 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -446,15 +446,13 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, { if (bytes && bs->encrypted) { BDRVQcow2State *s = bs->opaque; - int64_t sector = (s->crypt_physical_offset ? + int64_t offset = (s->crypt_physical_offset ? (cluster_offset + offset_in_cluster) : - (src_cluster_offset + offset_in_cluster)) - >> BDRV_SECTOR_BITS; + (src_cluster_offset + offset_in_cluster)); assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); assert((bytes & ~BDRV_SECTOR_MASK) == 0); assert(s->crypto); - if (qcrypto_block_encrypt(s->crypto, sector, buffer, - bytes, NULL) < 0) { + if (qcrypto_block_encrypt(s->crypto, offset, buffer, bytes, NULL) < 0) { return false; } } diff --git a/block/qcow2.c b/block/qcow2.c index b8da8ca105..33597394b5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1811,7 +1811,7 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_decrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { @@ -1946,7 +1946,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, if (qcrypto_block_encrypt(s->crypto, (s->crypt_physical_offset ? cluster_offset + offset_in_cluster : - offset) >> BDRV_SECTOR_BITS, + offset), cluster_data, cur_bytes, NULL) < 0) { ret = -EIO; diff --git a/crypto/block-luks.c b/crypto/block-luks.c index a9062bb0f2..d418ac30b8 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -1399,29 +1399,33 @@ static void qcrypto_block_luks_cleanup(QCryptoBlock *block) static int qcrypto_block_luks_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); return qcrypto_block_decrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } static int qcrypto_block_luks_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)); return qcrypto_block_encrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c index 4dd594a9ba..8817d6aaa7 100644 --- a/crypto/block-qcow.c +++ b/crypto/block-qcow.c @@ -143,29 +143,33 @@ qcrypto_block_qcow_cleanup(QCryptoBlock *block) static int qcrypto_block_qcow_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); return qcrypto_block_decrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } static int qcrypto_block_qcow_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { + assert(QEMU_IS_ALIGNED(offset, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(len, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE)); return qcrypto_block_encrypt_helper(block->cipher, block->niv, block->ivgen, QCRYPTO_BLOCK_QCOW_SECTOR_SIZE, - startsector, buf, len, errp); + offset, buf, len, errp); } diff --git a/crypto/block.c b/crypto/block.c index a7a9ad240e..f206d5eea8 100644 --- a/crypto/block.c +++ b/crypto/block.c @@ -127,22 +127,22 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, int qcrypto_block_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { - return block->driver->decrypt(block, startsector, buf, len, errp); + return block->driver->decrypt(block, offset, buf, len, errp); } int qcrypto_block_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { - return block->driver->encrypt(block, startsector, buf, len, errp); + return block->driver->encrypt(block, offset, buf, len, errp); } @@ -194,13 +194,17 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { uint8_t *iv; int ret = -1; + uint64_t startsector = offset / sectorsize; + + assert(QEMU_IS_ALIGNED(offset, sectorsize)); + assert(QEMU_IS_ALIGNED(len, sectorsize)); iv = niv ? g_new0(uint8_t, niv) : NULL; @@ -243,13 +247,17 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp) { uint8_t *iv; int ret = -1; + uint64_t startsector = offset / sectorsize; + + assert(QEMU_IS_ALIGNED(offset, sectorsize)); + assert(QEMU_IS_ALIGNED(len, sectorsize)); iv = niv ? g_new0(uint8_t, niv) : NULL; diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h index d227522d88..41840abcec 100644 --- a/crypto/blockpriv.h +++ b/crypto/blockpriv.h @@ -82,7 +82,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); @@ -91,7 +91,7 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher, size_t niv, QCryptoIVGen *ivgen, int sectorsize, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); diff --git a/include/crypto/block.h b/include/crypto/block.h index 13232b2472..cd18f46d56 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -161,18 +161,19 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block, /** * @qcrypto_block_decrypt: * @block: the block encryption object - * @startsector: the sector from which @buf was read + * @offset: the position at which @iov was read * @buf: the buffer to decrypt * @len: the length of @buf in bytes * @errp: pointer to a NULL-initialized error object * * Decrypt @len bytes of cipher text in @buf, writing - * plain text back into @buf + * plain text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. * * Returns 0 on success, -1 on failure */ int qcrypto_block_decrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); @@ -180,18 +181,19 @@ int qcrypto_block_decrypt(QCryptoBlock *block, /** * @qcrypto_block_encrypt: * @block: the block encryption object - * @startsector: the sector to which @buf will be written + * @offset: the position at which @iov will be written * @buf: the buffer to decrypt * @len: the length of @buf in bytes * @errp: pointer to a NULL-initialized error object * * Encrypt @len bytes of plain text in @buf, writing - * cipher text back into @buf + * cipher text back into @buf. @len and @offset must be + * a multiple of the encryption format sector size. * * Returns 0 on success, -1 on failure */ int qcrypto_block_encrypt(QCryptoBlock *block, - uint64_t startsector, + uint64_t offset, uint8_t *buf, size_t len, Error **errp); From d67a6b09b4ac27a4fac07544ded79b40d2717a0d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 27 Sep 2017 13:53:40 +0100 Subject: [PATCH 06/11] block: support passthrough of BDRV_REQ_FUA in crypto driver The BDRV_REQ_FUA flag can trivially be allowed in the crypt driver as a passthrough to the underlying block driver. Reviewed-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-id: 20170927125340.12360-7-berrange@redhat.com Signed-off-by: Max Reitz --- block/crypto.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/crypto.c b/block/crypto.c index edf53d49d1..60ddf8623e 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, return -EINVAL; } + bs->supported_write_flags = BDRV_REQ_FUA & + bs->file->bs->supported_write_flags; + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { @@ -462,7 +465,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block); uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block); - assert(!flags); + assert(!(flags & ~BDRV_REQ_FUA)); assert(payload_offset < INT64_MAX); assert(QEMU_IS_ALIGNED(offset, sector_size)); assert(QEMU_IS_ALIGNED(bytes, sector_size)); @@ -495,7 +498,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, qemu_iovec_add(&hd_qiov, cipher_data, cur_bytes); ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done, - cur_bytes, &hd_qiov, 0); + cur_bytes, &hd_qiov, flags); if (ret < 0) { goto cleanup; } From 18775ff32697ab6e1fd47989673bf1de54d0d942 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Thu, 28 Sep 2017 15:03:00 +0300 Subject: [PATCH 07/11] block/mirror: check backing in bdrv_mirror_top_refresh_filename Backing may be zero after failed bdrv_attach_child in bdrv_set_backing_hd, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170928120300.58164-1-vsementsov@virtuozzo.com Reviewed-by: John Snow Signed-off-by: Max Reitz --- block/mirror.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 459b80f8f3..3b6f0c5772 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1058,6 +1058,11 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_attach_child in + * bdrv_set_backing_hd */ + return; + } bdrv_refresh_filename(bs->backing->bs); pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->backing->bs->filename); From 47500c6775813c8f2b5a5de04d84222f3cecc62d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 27 Sep 2017 23:13:34 +0200 Subject: [PATCH 08/11] iotests: Fix 195 if IMGFMT is part of TEST_DIR do_run_qemu() in iotest 195 first applies _filter_imgfmt when printing qemu's command line and _filter_testdir only afterwards. Therefore, if the image format is part of the test directory path, _filter_testdir will no longer apply and the actual output will differ from the reference output even in case of success. For example, TEST_DIR might be "/tmp/test-qcow2", in which case _filter_imgfmt first transforms this to "/tmp/test-IMGFMT" which is no longer recognized as the TEST_DIR by _filter_testdir. Fix this by not applying _filter_imgfmt in do_run_qemu() but in run_qemu() instead, and only after _filter_testdir. Signed-off-by: Max Reitz Message-id: 20170927211334.3988-1-mreitz@redhat.com Reviewed-by: Eric Blake Signed-off-by: Max Reitz --- tests/qemu-iotests/195 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/195 b/tests/qemu-iotests/195 index 05a239cbf5..e7a403ded2 100755 --- a/tests/qemu-iotests/195 +++ b/tests/qemu-iotests/195 @@ -44,15 +44,16 @@ _supported_os Linux function do_run_qemu() { - echo Testing: "$@" | _filter_imgfmt + echo Testing: "$@" $QEMU -nographic -qmp-pretty stdio -serial none "$@" echo } function run_qemu() { - do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp \ - | _filter_qemu_io | _filter_generated_node_ids + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qemu \ + | _filter_qmp | _filter_qemu_io \ + | _filter_generated_node_ids } size=64M From 76a2a30a99c670e9ec1b4a5d976868059c6bc258 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:12 +0300 Subject: [PATCH 09/11] qcow2: fix return error code in qcow2_truncate() Signed-off-by: Pavel Butsykin Reviewed-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Max Reitz Message-id: 20170929121613.25997-2-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 33597394b5..960b3ab977 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3167,7 +3167,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (old_file_size < 0) { error_setg_errno(errp, -old_file_size, "Failed to inquire current file length"); - return ret; + return old_file_size; } nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, @@ -3196,7 +3196,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, if (allocation_start < 0) { error_setg_errno(errp, -allocation_start, "Failed to resize refcount structures"); - return -allocation_start; + return allocation_start; } clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, From 163bc39d2c2921430e5c23f4d0a0966d62f67a02 Mon Sep 17 00:00:00 2001 From: Pavel Butsykin Date: Fri, 29 Sep 2017 15:16:13 +0300 Subject: [PATCH 10/11] qcow2: truncate the tail of the image file after shrinking the image Now after shrinking the image, at the end of the image file, there might be a tail that probably will never be used. So we can find the last used cluster and cut the tail. Signed-off-by: Pavel Butsykin Reviewed-by: John Snow Message-id: 20170929121613.25997-3-pbutsykin@virtuozzo.com Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 22 ++++++++++++++++++++++ block/qcow2.c | 23 +++++++++++++++++++++++ block/qcow2.h | 1 + 3 files changed, 46 insertions(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 88d5a3f1ad..aa3fd6cf17 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -3181,3 +3181,25 @@ out: g_free(reftable_tmp); return ret; } + +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i; + + for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", + i, strerror(-ret)); + return ret; + } + if (refcount > 0) { + return i; + } + } + qcow2_signal_corruption(bs, true, -1, -1, + "There are no references in the refcount table."); + return -EIO; +} diff --git a/block/qcow2.c b/block/qcow2.c index 960b3ab977..f63d1831f8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, new_l1_size = size_to_l1(s, offset); if (offset < old_length) { + int64_t last_cluster, old_file_size; if (prealloc != PREALLOC_MODE_OFF) { error_setg(errp, "Preallocation can't be used for shrinking an image"); @@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset, "Failed to discard unused refblocks"); return ret; } + + old_file_size = bdrv_getlength(bs->file->bs); + if (old_file_size < 0) { + error_setg_errno(errp, -old_file_size, + "Failed to inquire current file length"); + return old_file_size; + } + last_cluster = qcow2_get_last_cluster(bs, old_file_size); + if (last_cluster < 0) { + error_setg_errno(errp, -last_cluster, + "Failed to find the last cluster"); + return last_cluster; + } + if ((last_cluster + 1) * s->cluster_size < old_file_size) { + ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, + PREALLOC_MODE_OFF, NULL); + if (ret < 0) { + warn_report("Failed to truncate the tail of the image: %s", + strerror(-ret)); + ret = 0; + } + } } else { ret = qcow2_grow_l1_table(bs, new_l1_size, true); if (ret < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index 5a289a81e2..782a206ecb 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); +int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, From ce960aa9062a407d0ca15aee3dcd3bd84a4e24f9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 29 Sep 2017 18:22:55 +0300 Subject: [PATCH 11/11] block/mirror: check backing in bdrv_mirror_top_flush Backing may be zero after failed bdrv_append in mirror_start_job, which leads to SIGSEGV. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-id: 20170929152255.5431-1-vsementsov@virtuozzo.com Signed-off-by: Max Reitz --- block/mirror.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 3b6f0c5772..153758ca9f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1041,6 +1041,10 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs) { + if (bs->backing == NULL) { + /* we can be here after failed bdrv_append in mirror_start_job */ + return 0; + } return bdrv_co_flush(bs->backing->bs); }