From 1466ef6cbe26a55e9bd5d4c3d9f58c793e1eb2c4 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 15 Mar 2022 00:32:24 +0300 Subject: [PATCH 01/12] qapi: rename BlockDirtyBitmapMergeSource to BlockDirtyBitmapOrStr Rename the type to be reused. Old name is "what is it for". To be natively reused for other needs, let's name it exactly "what is it". Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20220314213226.362217-2-v.sementsov-og@mail.ru> [eblake: Adjust S-o-b to Vladimir's new email, with permission] Reviewed-by: Eric Blake Acked-by: John Snow Signed-off-by: Eric Blake --- block/monitor/bitmap-qmp-cmds.c | 6 +++--- include/block/block_int-global-state.h | 2 +- qapi/block-core.json | 6 +++--- qemu-img.c | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c index 8e35616c2e..2b677c4a2f 100644 --- a/block/monitor/bitmap-qmp-cmds.c +++ b/block/monitor/bitmap-qmp-cmds.c @@ -257,12 +257,12 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name, } BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bms, + BlockDirtyBitmapOrStrList *bms, HBitmap **backup, Error **errp) { BlockDriverState *bs; BdrvDirtyBitmap *dst, *src, *anon; - BlockDirtyBitmapMergeSourceList *lst; + BlockDirtyBitmapOrStrList *lst; GLOBAL_STATE_CODE(); @@ -317,7 +317,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, } void qmp_block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bitmaps, + BlockDirtyBitmapOrStrList *bitmaps, Error **errp) { block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index 0f21b0570b..8b2e95f5ff 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -262,7 +262,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, BlockDriverState **pbs, Error **errp); BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bms, + BlockDirtyBitmapOrStrList *bms, HBitmap **backup, Error **errp); BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, bool release, diff --git a/qapi/block-core.json b/qapi/block-core.json index beeb91952a..b66494e8c5 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2079,7 +2079,7 @@ '*persistent': 'bool', '*disabled': 'bool' } } ## -# @BlockDirtyBitmapMergeSource: +# @BlockDirtyBitmapOrStr: # # @local: name of the bitmap, attached to the same node as target bitmap. # @@ -2087,7 +2087,7 @@ # # Since: 4.1 ## -{ 'alternate': 'BlockDirtyBitmapMergeSource', +{ 'alternate': 'BlockDirtyBitmapOrStr', 'data': { 'local': 'str', 'external': 'BlockDirtyBitmap' } } @@ -2106,7 +2106,7 @@ ## { 'struct': 'BlockDirtyBitmapMerge', 'data': { 'node': 'str', 'target': 'str', - 'bitmaps': ['BlockDirtyBitmapMergeSource'] } } + 'bitmaps': ['BlockDirtyBitmapOrStr'] } } ## # @block-dirty-bitmap-add: diff --git a/qemu-img.c b/qemu-img.c index 4c84134a1e..4cf4d2423d 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1623,16 +1623,16 @@ static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name, const char *src_node, const char *src_name, Error **errp) { - BlockDirtyBitmapMergeSource *merge_src; - BlockDirtyBitmapMergeSourceList *list = NULL; + BlockDirtyBitmapOrStr *merge_src; + BlockDirtyBitmapOrStrList *list = NULL; - merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); + merge_src = g_new0(BlockDirtyBitmapOrStr, 1); merge_src->type = QTYPE_QDICT; merge_src->u.external.node = g_strdup(src_node); merge_src->u.external.name = g_strdup(src_name); QAPI_LIST_PREPEND(list, merge_src); qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp); - qapi_free_BlockDirtyBitmapMergeSourceList(list); + qapi_free_BlockDirtyBitmapOrStrList(list); } enum ImgConvertBlockStatus { From e5fb29d5d001dd5f300ddb4ad48e11c3ab2d35ec Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 15 Mar 2022 00:32:25 +0300 Subject: [PATCH 02/12] qapi: nbd-export: allow select bitmaps by node/name pair Hi all! Current logic of relying on search through backing chain is not safe neither convenient. Sometimes it leads to necessity of extra bitmap copying. Also, we are going to add "snapshot-access" driver, to access some snapshot state through NBD. And this driver is not formally a filter, and of course it's not a COW format driver. So, searching through backing chain will not work. Instead of widening the workaround of bitmap searching, let's extend the interface so that user can select bitmap precisely. Note, that checking for bitmap active status is not copied to the new API, I don't see a reason for it, user should understand the risks. And anyway, bitmap from other node is unrelated to this export being read-only or read-write. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20220314213226.362217-3-v.sementsov-og@mail.ru> [eblake: Adjust S-o-b to Vladimir's new email, with permission] Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- blockdev-nbd.c | 8 +++++- nbd/server.c | 63 +++++++++++++++++++++++++++--------------- qapi/block-export.json | 5 +++- qemu-nbd.c | 11 ++++++-- 4 files changed, 61 insertions(+), 26 deletions(-) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 9840d25a82..7f6531cba0 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -211,8 +211,14 @@ void qmp_nbd_server_add(NbdServerAddOptions *arg, Error **errp) QAPI_CLONE_MEMBERS(BlockExportOptionsNbdBase, &export_opts->u.nbd, qapi_NbdServerAddOptions_base(arg)); if (arg->has_bitmap) { + BlockDirtyBitmapOrStr *el = g_new(BlockDirtyBitmapOrStr, 1); + + *el = (BlockDirtyBitmapOrStr) { + .type = QTYPE_QSTRING, + .u.local = g_strdup(arg->bitmap), + }; export_opts->u.nbd.has_bitmaps = true; - QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, g_strdup(arg->bitmap)); + QAPI_LIST_PREPEND(export_opts->u.nbd.bitmaps, el); } /* diff --git a/nbd/server.c b/nbd/server.c index c5644fd3f6..4cdbc062c1 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1643,7 +1643,7 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, uint64_t perm, shared_perm; bool readonly = !exp_args->writable; bool shared = !exp_args->writable; - strList *bitmaps; + BlockDirtyBitmapOrStrList *bitmaps; size_t i; int ret; @@ -1709,40 +1709,59 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, } exp->export_bitmaps = g_new0(BdrvDirtyBitmap *, exp->nr_export_bitmaps); for (i = 0, bitmaps = arg->bitmaps; bitmaps; - i++, bitmaps = bitmaps->next) { - const char *bitmap = bitmaps->value; + i++, bitmaps = bitmaps->next) + { + const char *bitmap; BlockDriverState *bs = blk_bs(blk); BdrvDirtyBitmap *bm = NULL; - while (bs) { - bm = bdrv_find_dirty_bitmap(bs, bitmap); - if (bm != NULL) { - break; + switch (bitmaps->value->type) { + case QTYPE_QSTRING: + bitmap = bitmaps->value->u.local; + while (bs) { + bm = bdrv_find_dirty_bitmap(bs, bitmap); + if (bm != NULL) { + break; + } + + bs = bdrv_filter_or_cow_bs(bs); } - bs = bdrv_filter_or_cow_bs(bs); + if (bm == NULL) { + ret = -ENOENT; + error_setg(errp, "Bitmap '%s' is not found", + bitmaps->value->u.local); + goto fail; + } + + if (readonly && bdrv_is_writable(bs) && + bdrv_dirty_bitmap_enabled(bm)) { + ret = -EINVAL; + error_setg(errp, "Enabled bitmap '%s' incompatible with " + "readonly export", bitmap); + goto fail; + } + break; + case QTYPE_QDICT: + bitmap = bitmaps->value->u.external.name; + bm = block_dirty_bitmap_lookup(bitmaps->value->u.external.node, + bitmap, NULL, errp); + if (!bm) { + ret = -ENOENT; + goto fail; + } + break; + default: + abort(); } - if (bm == NULL) { - ret = -ENOENT; - error_setg(errp, "Bitmap '%s' is not found", bitmap); - goto fail; - } + assert(bm); if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) { ret = -EINVAL; goto fail; } - if (readonly && bdrv_is_writable(bs) && - bdrv_dirty_bitmap_enabled(bm)) { - ret = -EINVAL; - error_setg(errp, - "Enabled bitmap '%s' incompatible with readonly export", - bitmap); - goto fail; - } - exp->export_bitmaps[i] = bm; assert(strlen(bitmap) <= BDRV_BITMAP_MAX_NAME_SIZE); } diff --git a/qapi/block-export.json b/qapi/block-export.json index 1e34927f85..1de16d2589 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -6,6 +6,7 @@ ## { 'include': 'sockets.json' } +{ 'include': 'block-core.json' } ## # @NbdServerOptions: @@ -89,6 +90,7 @@ # @device, so the NBD client can use NBD_OPT_SET_META_CONTEXT with # the metadata context name "qemu:dirty-bitmap:BITMAP" to inspect # each bitmap. +# Since 7.1 bitmap may be specified by node/name pair. # # @allocation-depth: Also export the allocation depth map for @device, so # the NBD client can use NBD_OPT_SET_META_CONTEXT with @@ -99,7 +101,8 @@ ## { 'struct': 'BlockExportOptionsNbd', 'base': 'BlockExportOptionsNbdBase', - 'data': { '*bitmaps': ['str'], '*allocation-depth': 'bool' } } + 'data': { '*bitmaps': ['BlockDirtyBitmapOrStr'], + '*allocation-depth': 'bool' } } ## # @BlockExportOptionsVhostUserBlk: diff --git a/qemu-nbd.c b/qemu-nbd.c index 397ffa64d7..db63980df1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -567,7 +567,7 @@ int main(int argc, char **argv) QDict *options = NULL; const char *export_name = NULL; /* defaults to "" later for server mode */ const char *export_description = NULL; - strList *bitmaps = NULL; + BlockDirtyBitmapOrStrList *bitmaps = NULL; bool alloc_depth = false; const char *tlscredsid = NULL; const char *tlshostname = NULL; @@ -687,7 +687,14 @@ int main(int argc, char **argv) alloc_depth = true; break; case 'B': - QAPI_LIST_PREPEND(bitmaps, g_strdup(optarg)); + { + BlockDirtyBitmapOrStr *el = g_new(BlockDirtyBitmapOrStr, 1); + *el = (BlockDirtyBitmapOrStr) { + .type = QTYPE_QSTRING, + .u.local = g_strdup(optarg), + }; + QAPI_LIST_PREPEND(bitmaps, el); + } break; case 'k': sockpath = optarg; From c08c220be70603941b680d5b39f2061222dd74e8 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 15 Mar 2022 00:32:26 +0300 Subject: [PATCH 03/12] iotests/223: check new possibility of exporting bitmaps by node/name Add simple test that new interface introduced in previous commit works. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20220314213226.362217-4-v.sementsov-og@mail.ru> [eblake: Adjust S-o-b to Vladimir's new email, with permission] Reviewed-by: Eric Blake Signed-off-by: Eric Blake --- tests/qemu-iotests/223 | 16 +++++++++++++ tests/qemu-iotests/223.out | 47 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223 index da87f2f4a2..0bbb283010 100755 --- a/tests/qemu-iotests/223 +++ b/tests/qemu-iotests/223 @@ -120,6 +120,11 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add", "file":{"driver":"file", "filename":"'"$TEST_IMG"'"}}}' "return" _send_qemu_cmd $QEMU_HANDLE '{"execute":"block-dirty-bitmap-disable", "arguments":{"node":"n", "name":"b"}}' "return" +_send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add", + "arguments":{"driver":"null-co", "node-name":"null", + "size": 4194304}}' "return" +_send_qemu_cmd $QEMU_HANDLE '{"execute":"block-dirty-bitmap-add", + "arguments":{"node":"null", "name":"b3"}}' "return" for attempt in normal iothread; do @@ -155,6 +160,9 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", "arguments":{"device":"n", "name":"n2", "writable":true, "description":"some text", "bitmap":"b2"}}' "return" +_send_qemu_cmd $QEMU_HANDLE '{"execute":"block-export-add", + "arguments":{"type": "nbd", "node-name":"n", "id":"n3", "name": "n3", + "bitmaps":[{"node":"null","name":"b3"}]}}' "return" $QEMU_NBD_PROG -L -k "$SOCK_DIR/nbd" echo @@ -178,6 +186,14 @@ IMG="driver=nbd,export=n2,server.type=unix,server.path=$SOCK_DIR/nbd" $QEMU_IMG map --output=json --image-opts \ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map +echo +echo "=== Check bitmap taken from another node ===" +echo + +IMG="driver=nbd,export=n3,server.type=unix,server.path=$SOCK_DIR/nbd" +$QEMU_IMG map --output=json --image-opts \ + "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map + echo echo "=== End qemu NBD server ===" echo diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out index e58ea5abbd..0647941531 100644 --- a/tests/qemu-iotests/223.out +++ b/tests/qemu-iotests/223.out @@ -33,6 +33,13 @@ wrote 2097152/2097152 bytes at offset 2097152 {"execute":"block-dirty-bitmap-disable", "arguments":{"node":"n", "name":"b"}} {"return": {}} +{"execute":"blockdev-add", + "arguments":{"driver":"null-co", "node-name":"null", + "size": 4194304}} +{"return": {}} +{"execute":"block-dirty-bitmap-add", + "arguments":{"node":"null", "name":"b3"}} +{"return": {}} === Set up NBD with normal access === @@ -69,7 +76,11 @@ exports available: 0 "arguments":{"device":"n", "name":"n2", "writable":true, "description":"some text", "bitmap":"b2"}} {"return": {}} -exports available: 2 +{"execute":"block-export-add", + "arguments":{"type": "nbd", "node-name":"n", "id":"n3", "name": "n3", + "bitmaps":[{"node":"null","name":"b3"}]}} +{"return": {}} +exports available: 3 export: 'n' size: 4194304 flags: 0x58f ( readonly flush fua df multi cache ) @@ -89,6 +100,15 @@ exports available: 2 available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 + export: 'n3' + size: 4194304 + flags: 0x58f ( readonly flush fua df multi cache ) + min block: 1 + opt block: 4096 + max block: 33554432 + available meta contexts: 2 + base:allocation + qemu:dirty-bitmap:b3 === Contrast normal status to large granularity dirty-bitmap === @@ -114,6 +134,10 @@ read 2097152/2097152 bytes at offset 2097152 { "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, { "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}] +=== Check bitmap taken from another node === + +[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}] + === End qemu NBD server === {"execute":"nbd-server-remove", @@ -128,6 +152,7 @@ read 2097152/2097152 bytes at offset 2097152 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} {"execute":"nbd-server-stop"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} {"return": {}} {"execute":"nbd-server-stop"} {"error": {"class": "GenericError", "desc": "NBD server not running"}} @@ -170,7 +195,11 @@ exports available: 0 "arguments":{"device":"n", "name":"n2", "writable":true, "description":"some text", "bitmap":"b2"}} {"return": {}} -exports available: 2 +{"execute":"block-export-add", + "arguments":{"type": "nbd", "node-name":"n", "id":"n3", "name": "n3", + "bitmaps":[{"node":"null","name":"b3"}]}} +{"return": {}} +exports available: 3 export: 'n' size: 4194304 flags: 0x58f ( readonly flush fua df multi cache ) @@ -190,6 +219,15 @@ exports available: 2 available meta contexts: 2 base:allocation qemu:dirty-bitmap:b2 + export: 'n3' + size: 4194304 + flags: 0x58f ( readonly flush fua df multi cache ) + min block: 1 + opt block: 4096 + max block: 33554432 + available meta contexts: 2 + base:allocation + qemu:dirty-bitmap:b3 === Contrast normal status to large granularity dirty-bitmap === @@ -215,6 +253,10 @@ read 2097152/2097152 bytes at offset 2097152 { "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, { "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}] +=== Check bitmap taken from another node === + +[{ "start": 0, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}] + === End qemu NBD server === {"execute":"nbd-server-remove", @@ -229,6 +271,7 @@ read 2097152/2097152 bytes at offset 2097152 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} {"execute":"nbd-server-stop"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} {"return": {}} {"execute":"nbd-server-stop"} {"error": {"class": "GenericError", "desc": "NBD server not running"}} From 8846b7d1c137ba261b2300b20e94ae360d88a538 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:48 +0200 Subject: [PATCH 04/12] nbd: safeguard against waking up invalid coroutine The .reply_possible field of s->requests is never set to false. This is not a problem as it is only a safeguard to detect protocol errors, but it's sloppy. In fact, the field is actually not necessary at all, because .coroutine is set to NULL in NBD_FOREACH_REPLY_CHUNK after receiving the last chunk. Thus, replace .reply_possible with .coroutine and move the check before deciding the fate of this request. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-2-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 567872ac53..1f97160949 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -58,7 +58,6 @@ typedef struct { Coroutine *coroutine; uint64_t offset; /* original offset of the request */ bool receiving; /* sleeping in the yield in nbd_receive_replies */ - bool reply_possible; /* reply header not yet received */ } NBDClientRequest; typedef enum NBDClientState { @@ -454,15 +453,15 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) nbd_channel_error(s, -EINVAL); return -EINVAL; } + ind2 = HANDLE_TO_INDEX(s, s->reply.handle); + if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].coroutine) { + nbd_channel_error(s, -EINVAL); + return -EINVAL; + } if (s->reply.handle == handle) { /* We are done */ return 0; } - ind2 = HANDLE_TO_INDEX(s, s->reply.handle); - if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].reply_possible) { - nbd_channel_error(s, -EINVAL); - return -EINVAL; - } nbd_recv_coroutine_wake_one(&s->requests[ind2]); } } @@ -505,7 +504,6 @@ static int nbd_co_send_request(BlockDriverState *bs, s->requests[i].coroutine = qemu_coroutine_self(); s->requests[i].offset = request->from; s->requests[i].receiving = false; - s->requests[i].reply_possible = true; request->handle = INDEX_TO_HANDLE(s, i); From 0c43c6fc896ff0894627b9464c3db94a33c8c7ac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:49 +0200 Subject: [PATCH 05/12] nbd: mark more coroutine_fns Several coroutine functions in block/nbd.c are not marked as such. This patch adds a few more markers; it is not exhaustive, but it focuses especially on: - places that wake other coroutines, because aio_co_wake() has very different semantics inside a coroutine (queuing after yield vs. entering immediately); - functions with _co_ in their names, to avoid confusion Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-3-pbonzini@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 64 ++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 1f97160949..3a6e609203 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -132,7 +132,7 @@ static bool nbd_client_connected(BDRVNBDState *s) return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED; } -static bool nbd_recv_coroutine_wake_one(NBDClientRequest *req) +static bool coroutine_fn nbd_recv_coroutine_wake_one(NBDClientRequest *req) { if (req->receiving) { req->receiving = false; @@ -143,7 +143,7 @@ static bool nbd_recv_coroutine_wake_one(NBDClientRequest *req) return false; } -static void nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) +static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) { int i; @@ -154,7 +154,7 @@ static void nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) } } -static void nbd_channel_error(BDRVNBDState *s, int ret) +static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret) { if (nbd_client_connected(s)) { qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); @@ -466,9 +466,9 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) } } -static int nbd_co_send_request(BlockDriverState *bs, - NBDRequest *request, - QEMUIOVector *qiov) +static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, + NBDRequest *request, + QEMUIOVector *qiov) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int rc, i = -1; @@ -721,9 +721,9 @@ static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk, return 0; } -static int nbd_co_receive_offset_data_payload(BDRVNBDState *s, - uint64_t orig_offset, - QEMUIOVector *qiov, Error **errp) +static int coroutine_fn +nbd_co_receive_offset_data_payload(BDRVNBDState *s, uint64_t orig_offset, + QEMUIOVector *qiov, Error **errp) { QEMUIOVector sub_qiov; uint64_t offset; @@ -1039,8 +1039,8 @@ break_loop: return false; } -static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle, - int *request_ret, Error **errp) +static int coroutine_fn nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle, + int *request_ret, Error **errp) { NBDReplyChunkIter iter; @@ -1053,9 +1053,9 @@ static int nbd_co_receive_return_code(BDRVNBDState *s, uint64_t handle, return iter.ret; } -static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle, - uint64_t offset, QEMUIOVector *qiov, - int *request_ret, Error **errp) +static int coroutine_fn nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle, + uint64_t offset, QEMUIOVector *qiov, + int *request_ret, Error **errp) { NBDReplyChunkIter iter; NBDReply reply; @@ -1105,10 +1105,10 @@ static int nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t handle, return iter.ret; } -static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s, - uint64_t handle, uint64_t length, - NBDExtent *extent, - int *request_ret, Error **errp) +static int coroutine_fn nbd_co_receive_blockstatus_reply(BDRVNBDState *s, + uint64_t handle, uint64_t length, + NBDExtent *extent, + int *request_ret, Error **errp) { NBDReplyChunkIter iter; NBDReply reply; @@ -1165,8 +1165,8 @@ static int nbd_co_receive_blockstatus_reply(BDRVNBDState *s, return iter.ret; } -static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, - QEMUIOVector *write_qiov) +static int coroutine_fn nbd_co_request(BlockDriverState *bs, NBDRequest *request, + QEMUIOVector *write_qiov) { int ret, request_ret; Error *local_err = NULL; @@ -1202,9 +1202,9 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request, return ret ? ret : request_ret; } -static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { int ret, request_ret; Error *local_err = NULL; @@ -1261,9 +1261,9 @@ static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset, return ret ? ret : request_ret; } -static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1286,8 +1286,8 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset, return nbd_co_request(bs, &request, qiov); } -static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, - int64_t bytes, BdrvRequestFlags flags) +static int coroutine_fn nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int64_t bytes, BdrvRequestFlags flags) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1321,7 +1321,7 @@ static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, return nbd_co_request(bs, &request, NULL); } -static int nbd_client_co_flush(BlockDriverState *bs) +static int coroutine_fn nbd_client_co_flush(BlockDriverState *bs) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { .type = NBD_CMD_FLUSH }; @@ -1336,8 +1336,8 @@ static int nbd_client_co_flush(BlockDriverState *bs) return nbd_co_request(bs, &request, NULL); } -static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, - int64_t bytes) +static int coroutine_fn nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, + int64_t bytes) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; NBDRequest request = { @@ -1913,7 +1913,7 @@ fail: return ret; } -static int nbd_co_flush(BlockDriverState *bs) +static int coroutine_fn nbd_co_flush(BlockDriverState *bs) { return nbd_client_co_flush(bs); } From 172f5f1a4058c361bfcd19317b0e3151556b3edb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:50 +0200 Subject: [PATCH 06/12] nbd: remove peppering of nbd_client_connected It is unnecessary to check nbd_client_connected() because every time s->state is moved out of NBD_CLIENT_CONNECTED the socket is shut down and all coroutines are resumed. The only case where it was actually needed is when the NBD server disconnects and there is no reconnect-delay. In that case, nbd_receive_replies() does not set s->reply.handle and nbd_co_do_receive_one_chunk() cannot continue. For that one case, check the return value of nbd_receive_replies(). As to the others: * nbd_receive_replies() can put the current coroutine to sleep if another reply is ongoing; then it will be woken by nbd_channel_error(), called by the ongoing reply. Or it can try itself to read a reply header and fail, thus calling nbd_channel_error() itself. * nbd_co_send_request() will write the body of the request and fail * nbd_reply_chunk_iter_receive() will call nbd_co_receive_one_chunk() and then nbd_co_do_receive_one_chunk(), which will handle the failure as above; or it will just detect a previous call to nbd_iter_channel_error() via iter->ret < 0. Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-4-pbonzini@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 3a6e609203..326546f6cd 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -409,10 +409,6 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) return 0; } - if (!nbd_client_connected(s)) { - return -EIO; - } - if (s->reply.handle != 0) { /* * Some other request is being handled now. It should already be @@ -512,7 +508,7 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, if (qiov) { qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); - if (nbd_client_connected(s) && rc >= 0) { + if (rc >= 0) { if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov, NULL) < 0) { rc = -EIO; @@ -829,8 +825,8 @@ static coroutine_fn int nbd_co_do_receive_one_chunk( } *request_ret = 0; - nbd_receive_replies(s, handle); - if (!nbd_client_connected(s)) { + ret = nbd_receive_replies(s, handle); + if (ret < 0) { error_setg(errp, "Connection closed"); return -EIO; } @@ -982,11 +978,6 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, NBDReply local_reply; NBDStructuredReplyChunk *chunk; Error *local_err = NULL; - if (!nbd_client_connected(s)) { - error_setg(&local_err, "Connection closed"); - nbd_iter_channel_error(iter, -EIO, &local_err); - goto break_loop; - } if (iter->done) { /* Previous iteration was last. */ @@ -1007,7 +998,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, } /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */ - if (nbd_reply_is_simple(reply) || !nbd_client_connected(s)) { + if (nbd_reply_is_simple(reply) || iter->ret < 0) { goto break_loop; } From 8610b4491f02312f3bf0de10826fad7aa99ddfcf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:51 +0200 Subject: [PATCH 07/12] nbd: keep send_mutex/free_sema handling outside nbd_co_do_establish_connection Elevate s->in_flight early so that other incoming requests will wait on the CoQueue in nbd_co_send_request; restart them after getting back from nbd_reconnect_attempt. This could be after the reconnect timer or nbd_cancel_in_flight have cancelled the attempt, so there is no need anymore to cancel the requests there. nbd_co_send_request now handles both stopping and restarting pending requests after a successful connection, and there is no need to hold send_mutex in nbd_co_do_establish_connection. The current setup is confusing because nbd_co_do_establish_connection is called both with send_mutex taken and without it. Before the patch it uses free_sema which (at least in theory...) is protected by send_mutex, after the patch it does not anymore. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-5-pbonzini@redhat.com> Reviewed-by: Eric Blake [eblake: wrap long line] Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/coroutines.h | 5 ++-- block/nbd.c | 66 ++++++++++++++++++++++------------------------ 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/block/coroutines.h b/block/coroutines.h index b293e943c8..8ea70d45f9 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -59,7 +59,8 @@ int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int coroutine_fn -nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); +nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, + Error **errp); int coroutine_fn @@ -109,7 +110,7 @@ bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState **file, int *depth); int generated_co_wrapper -nbd_do_establish_connection(BlockDriverState *bs, Error **errp); +nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp); int generated_co_wrapper blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, diff --git a/block/nbd.c b/block/nbd.c index 326546f6cd..1e7f609312 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -187,9 +187,6 @@ static void reconnect_delay_timer_cb(void *opaque) if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) { s->state = NBD_CLIENT_CONNECTING_NOWAIT; nbd_co_establish_connection_cancel(s->conn); - while (qemu_co_enter_next(&s->free_sema, NULL)) { - /* Resume all queued requests */ - } } reconnect_delay_timer_del(s); @@ -310,11 +307,10 @@ static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp) } int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, - Error **errp) + bool blocking, Error **errp) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; - bool blocking = nbd_client_connecting_wait(s); IO_CODE(); assert(!s->ioc); @@ -350,7 +346,6 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, /* successfully connected */ s->state = NBD_CLIENT_CONNECTED; - qemu_co_queue_restart_all(&s->free_sema); return 0; } @@ -358,25 +353,25 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, /* called under s->send_mutex */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { - assert(nbd_client_connecting(s)); - assert(s->in_flight == 0); - - if (nbd_client_connecting_wait(s) && s->reconnect_delay && - !s->reconnect_delay_timer) - { - /* - * It's first reconnect attempt after switching to - * NBD_CLIENT_CONNECTING_WAIT - */ - reconnect_delay_timer_init(s, - qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + - s->reconnect_delay * NANOSECONDS_PER_SECOND); - } + bool blocking = nbd_client_connecting_wait(s); /* * Now we are sure that nobody is accessing the channel, and no one will * try until we set the state to CONNECTED. */ + assert(nbd_client_connecting(s)); + assert(s->in_flight == 1); + + if (blocking && !s->reconnect_delay_timer) { + /* + * It's the first reconnect attempt after switching to + * NBD_CLIENT_CONNECTING_WAIT + */ + g_assert(s->reconnect_delay); + reconnect_delay_timer_init(s, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + + s->reconnect_delay * NANOSECONDS_PER_SECOND); + } /* Finalize previous connection if any */ if (s->ioc) { @@ -387,7 +382,9 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) s->ioc = NULL; } - nbd_co_do_establish_connection(s->bs, NULL); + qemu_co_mutex_unlock(&s->send_mutex); + nbd_co_do_establish_connection(s->bs, blocking, NULL); + qemu_co_mutex_lock(&s->send_mutex); /* * The reconnect attempt is done (maybe successfully, maybe not), so @@ -472,21 +469,21 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, qemu_co_mutex_lock(&s->send_mutex); while (s->in_flight == MAX_NBD_REQUESTS || - (!nbd_client_connected(s) && s->in_flight > 0)) - { + (!nbd_client_connected(s) && s->in_flight > 0)) { qemu_co_queue_wait(&s->free_sema, &s->send_mutex); } - if (nbd_client_connecting(s)) { - nbd_reconnect_attempt(s); - } - - if (!nbd_client_connected(s)) { - rc = -EIO; - goto err; - } - s->in_flight++; + if (!nbd_client_connected(s)) { + if (nbd_client_connecting(s)) { + nbd_reconnect_attempt(s); + qemu_co_queue_restart_all(&s->free_sema); + } + if (!nbd_client_connected(s)) { + rc = -EIO; + goto err; + } + } for (i = 0; i < MAX_NBD_REQUESTS; i++) { if (s->requests[i].coroutine == NULL) { @@ -526,8 +523,8 @@ err: nbd_channel_error(s, rc); if (i != -1) { s->requests[i].coroutine = NULL; - s->in_flight--; } + s->in_flight--; qemu_co_queue_next(&s->free_sema); } qemu_co_mutex_unlock(&s->send_mutex); @@ -1882,7 +1879,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, } s->state = NBD_CLIENT_CONNECTING_WAIT; - ret = nbd_do_establish_connection(bs, errp); + ret = nbd_do_establish_connection(bs, true, errp); if (ret < 0) { goto fail; } @@ -2048,7 +2045,6 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) if (s->state == NBD_CLIENT_CONNECTING_WAIT) { s->state = NBD_CLIENT_CONNECTING_NOWAIT; - qemu_co_queue_restart_all(&s->free_sema); } nbd_co_establish_connection_cancel(s->conn); From ee19d953ecff53908a645d38a1b913fdf15a72f4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:52 +0200 Subject: [PATCH 08/12] nbd: use a QemuMutex to synchronize yanking, reconnection and coroutines The condition for waiting on the s->free_sema queue depends on both s->in_flight and s->state. The latter is currently using atomics, but this is quite dubious and probably wrong. Because s->state is written in the main thread too, for example by the yank callback, it cannot be protected by a CoMutex. Introduce a separate lock that can be used by nbd_co_send_request(); later on this lock will also be used for s->state. There will not be any contention on the lock unless there is a yank or reconnect, so this is not performance sensitive. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-6-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 1e7f609312..cea77becba 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -71,17 +71,22 @@ typedef struct BDRVNBDState { QIOChannel *ioc; /* The current I/O channel */ NBDExportInfo info; - CoMutex send_mutex; + /* + * Protects free_sema, in_flight, requests[].coroutine, + * reconnect_delay_timer. + */ + QemuMutex requests_lock; CoQueue free_sema; - - CoMutex receive_mutex; int in_flight; + NBDClientRequest requests[MAX_NBD_REQUESTS]; + QEMUTimer *reconnect_delay_timer; + + CoMutex send_mutex; + CoMutex receive_mutex; NBDClientState state; - QEMUTimer *reconnect_delay_timer; QEMUTimer *open_timer; - NBDClientRequest requests[MAX_NBD_REQUESTS]; NBDReply reply; BlockDriverState *bs; @@ -350,7 +355,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, return 0; } -/* called under s->send_mutex */ +/* Called with s->requests_lock taken. */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { bool blocking = nbd_client_connecting_wait(s); @@ -382,9 +387,9 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) s->ioc = NULL; } - qemu_co_mutex_unlock(&s->send_mutex); + qemu_mutex_unlock(&s->requests_lock); nbd_co_do_establish_connection(s->bs, blocking, NULL); - qemu_co_mutex_lock(&s->send_mutex); + qemu_mutex_lock(&s->requests_lock); /* * The reconnect attempt is done (maybe successfully, maybe not), so @@ -466,11 +471,10 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int rc, i = -1; - qemu_co_mutex_lock(&s->send_mutex); - + qemu_mutex_lock(&s->requests_lock); while (s->in_flight == MAX_NBD_REQUESTS || (!nbd_client_connected(s) && s->in_flight > 0)) { - qemu_co_queue_wait(&s->free_sema, &s->send_mutex); + qemu_co_queue_wait(&s->free_sema, &s->requests_lock); } s->in_flight++; @@ -491,13 +495,13 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, } } - g_assert(qemu_in_coroutine()); assert(i < MAX_NBD_REQUESTS); - s->requests[i].coroutine = qemu_coroutine_self(); s->requests[i].offset = request->from; s->requests[i].receiving = false; + qemu_mutex_unlock(&s->requests_lock); + qemu_co_mutex_lock(&s->send_mutex); request->handle = INDEX_TO_HANDLE(s, i); assert(s->ioc); @@ -517,17 +521,19 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, } else { rc = nbd_send_request(s->ioc, request); } + qemu_co_mutex_unlock(&s->send_mutex); -err: if (rc < 0) { + qemu_mutex_lock(&s->requests_lock); +err: nbd_channel_error(s, rc); if (i != -1) { s->requests[i].coroutine = NULL; } s->in_flight--; qemu_co_queue_next(&s->free_sema); + qemu_mutex_unlock(&s->requests_lock); } - qemu_co_mutex_unlock(&s->send_mutex); return rc; } @@ -1017,12 +1023,11 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s, return true; break_loop: + qemu_mutex_lock(&s->requests_lock); s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL; - - qemu_co_mutex_lock(&s->send_mutex); s->in_flight--; qemu_co_queue_next(&s->free_sema); - qemu_co_mutex_unlock(&s->send_mutex); + qemu_mutex_unlock(&s->requests_lock); return false; } @@ -1855,8 +1860,9 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, BDRVNBDState *s = (BDRVNBDState *)bs->opaque; s->bs = bs; - qemu_co_mutex_init(&s->send_mutex); + qemu_mutex_init(&s->requests_lock); qemu_co_queue_init(&s->free_sema); + qemu_co_mutex_init(&s->send_mutex); qemu_co_mutex_init(&s->receive_mutex); if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) { @@ -2043,9 +2049,11 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) reconnect_delay_timer_del(s); + qemu_mutex_lock(&s->requests_lock); if (s->state == NBD_CLIENT_CONNECTING_WAIT) { s->state = NBD_CLIENT_CONNECTING_NOWAIT; } + qemu_mutex_unlock(&s->requests_lock); nbd_co_establish_connection_cancel(s->conn); } From 8d45185cb76fa1dd7c3309940a967dc42d8619d4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:53 +0200 Subject: [PATCH 09/12] nbd: code motion and function renaming Prepare for the next patch, so that the diff is less confusing. nbd_client_connecting is moved closer to the definition point. nbd_client_connecting_wait() is kept only for the reconnection logic; when it is used to check if a request has to be reissued, use the renamed function nbd_client_will_reconnect(). In the next patch, the two cases will have different locking requirements. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-7-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index cea77becba..3cba874b1c 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -254,18 +254,15 @@ static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) timer_mod(s->open_timer, expire_time_ns); } -static bool nbd_client_connecting(BDRVNBDState *s) -{ - NBDClientState state = qatomic_load_acquire(&s->state); - return state == NBD_CLIENT_CONNECTING_WAIT || - state == NBD_CLIENT_CONNECTING_NOWAIT; -} - static bool nbd_client_connecting_wait(BDRVNBDState *s) { return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT; } +static bool nbd_client_will_reconnect(BDRVNBDState *s) +{ + return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT; +} /* * Update @bs with information learned during a completed negotiation process. * Return failure if the server's advertised options are incompatible with the @@ -355,6 +352,13 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, return 0; } +static bool nbd_client_connecting(BDRVNBDState *s) +{ + NBDClientState state = qatomic_load_acquire(&s->state); + return state == NBD_CLIENT_CONNECTING_WAIT || + state == NBD_CLIENT_CONNECTING_NOWAIT; +} + /* Called with s->requests_lock taken. */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { @@ -1190,7 +1194,7 @@ static int coroutine_fn nbd_co_request(BlockDriverState *bs, NBDRequest *request error_free(local_err); local_err = NULL; } - } while (ret < 0 && nbd_client_connecting_wait(s)); + } while (ret < 0 && nbd_client_will_reconnect(s)); return ret ? ret : request_ret; } @@ -1249,7 +1253,7 @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse error_free(local_err); local_err = NULL; } - } while (ret < 0 && nbd_client_connecting_wait(s)); + } while (ret < 0 && nbd_client_will_reconnect(s)); return ret ? ret : request_ret; } @@ -1407,7 +1411,7 @@ static int coroutine_fn nbd_client_co_block_status( error_free(local_err); local_err = NULL; } - } while (ret < 0 && nbd_client_connecting_wait(s)); + } while (ret < 0 && nbd_client_will_reconnect(s)); if (ret < 0 || request_ret < 0) { return ret ? ret : request_ret; From dba5156c0e9c0362b7c6121f9e1c89bb9be1f227 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:54 +0200 Subject: [PATCH 10/12] nbd: move s->state under requests_lock Remove the confusing, and most likely wrong, atomics. The only function that used to be somewhat in a hot path was nbd_client_connected(), but it is not anymore after the previous patches. The same logic is used both to check if a request had to be reissued and also in nbd_reconnecting_attempt(). The former cases are outside requests_lock, while nbd_reconnecting_attempt() does have the lock, therefore the two have been separated in the previous commit. nbd_client_will_reconnect() can simply take s->requests_lock, while nbd_reconnecting_attempt() can inline the access now that no complicated atomics are involved. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-8-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 78 ++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 3cba874b1c..a5c690cef7 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -35,7 +35,6 @@ #include "qemu/option.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" -#include "qemu/atomic.h" #include "qapi/qapi-visit-sockets.h" #include "qapi/qmp/qstring.h" @@ -72,10 +71,11 @@ typedef struct BDRVNBDState { NBDExportInfo info; /* - * Protects free_sema, in_flight, requests[].coroutine, + * Protects state, free_sema, in_flight, requests[].coroutine, * reconnect_delay_timer. */ QemuMutex requests_lock; + NBDClientState state; CoQueue free_sema; int in_flight; NBDClientRequest requests[MAX_NBD_REQUESTS]; @@ -83,7 +83,6 @@ typedef struct BDRVNBDState { CoMutex send_mutex; CoMutex receive_mutex; - NBDClientState state; QEMUTimer *open_timer; @@ -132,11 +131,6 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) s->x_dirty_bitmap = NULL; } -static bool nbd_client_connected(BDRVNBDState *s) -{ - return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED; -} - static bool coroutine_fn nbd_recv_coroutine_wake_one(NBDClientRequest *req) { if (req->receiving) { @@ -159,14 +153,15 @@ static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) } } -static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret) +/* Called with s->requests_lock held. */ +static void coroutine_fn nbd_channel_error_locked(BDRVNBDState *s, int ret) { - if (nbd_client_connected(s)) { + if (s->state == NBD_CLIENT_CONNECTED) { qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } if (ret == -EIO) { - if (nbd_client_connected(s)) { + if (s->state == NBD_CLIENT_CONNECTED) { s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT : NBD_CLIENT_CONNECTING_NOWAIT; } @@ -177,6 +172,12 @@ static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret) nbd_recv_coroutines_wake(s, true); } +static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret) +{ + QEMU_LOCK_GUARD(&s->requests_lock); + nbd_channel_error_locked(s, ret); +} + static void reconnect_delay_timer_del(BDRVNBDState *s) { if (s->reconnect_delay_timer) { @@ -189,20 +190,18 @@ static void reconnect_delay_timer_cb(void *opaque) { BDRVNBDState *s = opaque; - if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) { - s->state = NBD_CLIENT_CONNECTING_NOWAIT; - nbd_co_establish_connection_cancel(s->conn); - } - reconnect_delay_timer_del(s); + WITH_QEMU_LOCK_GUARD(&s->requests_lock) { + if (s->state != NBD_CLIENT_CONNECTING_WAIT) { + return; + } + s->state = NBD_CLIENT_CONNECTING_NOWAIT; + } + nbd_co_establish_connection_cancel(s->conn); } static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) { - if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTING_WAIT) { - return; - } - assert(!s->reconnect_delay_timer); s->reconnect_delay_timer = aio_timer_new(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME, @@ -225,7 +224,9 @@ static void nbd_teardown_connection(BlockDriverState *bs) s->ioc = NULL; } - s->state = NBD_CLIENT_QUIT; + WITH_QEMU_LOCK_GUARD(&s->requests_lock) { + s->state = NBD_CLIENT_QUIT; + } } static void open_timer_del(BDRVNBDState *s) @@ -254,15 +255,15 @@ static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) timer_mod(s->open_timer, expire_time_ns); } -static bool nbd_client_connecting_wait(BDRVNBDState *s) -{ - return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT; -} - static bool nbd_client_will_reconnect(BDRVNBDState *s) { - return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT; + /* + * Called only after a socket error, so this is not performance sensitive. + */ + QEMU_LOCK_GUARD(&s->requests_lock); + return s->state == NBD_CLIENT_CONNECTING_WAIT; } + /* * Update @bs with information learned during a completed negotiation process. * Return failure if the server's advertised options are incompatible with the @@ -347,22 +348,24 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs)); /* successfully connected */ - s->state = NBD_CLIENT_CONNECTED; + WITH_QEMU_LOCK_GUARD(&s->requests_lock) { + s->state = NBD_CLIENT_CONNECTED; + } return 0; } +/* Called with s->requests_lock held. */ static bool nbd_client_connecting(BDRVNBDState *s) { - NBDClientState state = qatomic_load_acquire(&s->state); - return state == NBD_CLIENT_CONNECTING_WAIT || - state == NBD_CLIENT_CONNECTING_NOWAIT; + return s->state == NBD_CLIENT_CONNECTING_WAIT || + s->state == NBD_CLIENT_CONNECTING_NOWAIT; } /* Called with s->requests_lock taken. */ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) { - bool blocking = nbd_client_connecting_wait(s); + bool blocking = s->state == NBD_CLIENT_CONNECTING_WAIT; /* * Now we are sure that nobody is accessing the channel, and no one will @@ -477,17 +480,17 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, qemu_mutex_lock(&s->requests_lock); while (s->in_flight == MAX_NBD_REQUESTS || - (!nbd_client_connected(s) && s->in_flight > 0)) { + (s->state != NBD_CLIENT_CONNECTED && s->in_flight > 0)) { qemu_co_queue_wait(&s->free_sema, &s->requests_lock); } s->in_flight++; - if (!nbd_client_connected(s)) { + if (s->state != NBD_CLIENT_CONNECTED) { if (nbd_client_connecting(s)) { nbd_reconnect_attempt(s); qemu_co_queue_restart_all(&s->free_sema); } - if (!nbd_client_connected(s)) { + if (s->state != NBD_CLIENT_CONNECTED) { rc = -EIO; goto err; } @@ -530,7 +533,7 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs, if (rc < 0) { qemu_mutex_lock(&s->requests_lock); err: - nbd_channel_error(s, rc); + nbd_channel_error_locked(s, rc); if (i != -1) { s->requests[i].coroutine = NULL; } @@ -1443,8 +1446,9 @@ static void nbd_yank(void *opaque) BlockDriverState *bs = opaque; BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - qatomic_store_release(&s->state, NBD_CLIENT_QUIT); + QEMU_LOCK_GUARD(&s->requests_lock); qio_channel_shutdown(QIO_CHANNEL(s->ioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + s->state = NBD_CLIENT_QUIT; } static void nbd_client_close(BlockDriverState *bs) From a80a9a1c7397382fc4c4e6feaa8242b25cadb519 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:55 +0200 Subject: [PATCH 11/12] nbd: take receive_mutex when reading requests[].receiving requests[].receiving is set by nbd_receive_replies() under the receive_mutex; Read it under the same mutex as well. Waking up receivers on errors happens after each reply finishes processing, in nbd_co_receive_one_chunk(). If there is no currently-active reply, there are two cases: * either there is no active request at all, in which case no element of request[] can have .receiving = true * or nbd_receive_replies() must be running and owns receive_mutex; in that case it will get back to nbd_co_receive_one_chunk() because the socket has been shutdown, and all waiting coroutines will wake up in turn. Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-9-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index a5c690cef7..3bfcf4d97c 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -131,6 +131,7 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) s->x_dirty_bitmap = NULL; } +/* Called with s->receive_mutex taken. */ static bool coroutine_fn nbd_recv_coroutine_wake_one(NBDClientRequest *req) { if (req->receiving) { @@ -142,12 +143,13 @@ static bool coroutine_fn nbd_recv_coroutine_wake_one(NBDClientRequest *req) return false; } -static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s, bool all) +static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s) { int i; + QEMU_LOCK_GUARD(&s->receive_mutex); for (i = 0; i < MAX_NBD_REQUESTS; i++) { - if (nbd_recv_coroutine_wake_one(&s->requests[i]) && !all) { + if (nbd_recv_coroutine_wake_one(&s->requests[i])) { return; } } @@ -168,8 +170,6 @@ static void coroutine_fn nbd_channel_error_locked(BDRVNBDState *s, int ret) } else { s->state = NBD_CLIENT_QUIT; } - - nbd_recv_coroutines_wake(s, true); } static void coroutine_fn nbd_channel_error(BDRVNBDState *s, int ret) @@ -432,11 +432,10 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) qemu_coroutine_yield(); /* - * We may be woken for 3 reasons: + * We may be woken for 2 reasons: * 1. From this function, executing in parallel coroutine, when our * handle is received. - * 2. From nbd_channel_error(), when connection is lost. - * 3. From nbd_co_receive_one_chunk(), when previous request is + * 2. From nbd_co_receive_one_chunk(), when previous request is * finished and s->reply.handle set to 0. * Anyway, it's OK to lock the mutex and go to the next iteration. */ @@ -928,7 +927,7 @@ static coroutine_fn int nbd_co_receive_one_chunk( } s->reply.handle = 0; - nbd_recv_coroutines_wake(s, false); + nbd_recv_coroutines_wake(s); return ret; } From 620c5cb5da57dc97f655e6218e7ca9bc896394a2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Apr 2022 19:57:56 +0200 Subject: [PATCH 12/12] nbd: document what is protected by the CoMutexes Signed-off-by: Paolo Bonzini Message-Id: <20220414175756.671165-10-pbonzini@redhat.com> Reviewed-by: Eric Blake Reviewed-by: Lukas Straub Signed-off-by: Eric Blake --- block/nbd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/nbd.c b/block/nbd.c index 3bfcf4d97c..6085ab1d2c 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -81,12 +81,18 @@ typedef struct BDRVNBDState { NBDClientRequest requests[MAX_NBD_REQUESTS]; QEMUTimer *reconnect_delay_timer; + /* Protects sending data on the socket. */ CoMutex send_mutex; + + /* + * Protects receiving reply headers from the socket, as well as the + * fields reply and requests[].receiving + */ CoMutex receive_mutex; + NBDReply reply; QEMUTimer *open_timer; - NBDReply reply; BlockDriverState *bs; /* Connection parameters */