From 871cfc540014bb2292d79ce4348484a3dbb33034 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 8 May 2023 21:10:52 +0200 Subject: [PATCH 01/10] ram: Add public helper to set colo bitmap The overhead of the mutex in non-multifd mode is negligible, because in that case its just the single thread taking the mutex. This will be used in the next commits to add colo support to multifd. Signed-off-by: Lukas Straub Reviewed-by: Juan Quintela Message-Id: <22d83cb428f37929563155531bfb69fd8953cc61.1683572883.git.lukasstraub2@web.de> Signed-off-by: Juan Quintela --- migration/ram.c | 17 ++++++++++++++--- migration/ram.h | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index f78e9912cd..b5d03f85ab 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3408,6 +3408,18 @@ static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block, return ((uintptr_t)block->host + offset) & (block->page_size - 1); } +void colo_record_bitmap(RAMBlock *block, ram_addr_t *normal, uint32_t pages) +{ + qemu_mutex_lock(&ram_state->bitmap_mutex); + for (int i = 0; i < pages; i++) { + ram_addr_t offset = normal[i]; + ram_state->migration_dirty_pages += !test_and_set_bit( + offset >> TARGET_PAGE_BITS, + block->bmap); + } + qemu_mutex_unlock(&ram_state->bitmap_mutex); +} + static inline void *colo_cache_from_block_offset(RAMBlock *block, ram_addr_t offset, bool record_bitmap) { @@ -3425,9 +3437,8 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block, * It help us to decide which pages in ram cache should be flushed * into VM's RAM later. */ - if (record_bitmap && - !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { - ram_state->migration_dirty_pages++; + if (record_bitmap) { + colo_record_bitmap(block, &offset, 1); } return block->colo_cache + offset; } diff --git a/migration/ram.h b/migration/ram.h index 6fffbeb5f1..ea1f3c25b5 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -82,6 +82,7 @@ int colo_init_ram_cache(void); void colo_flush_ram_cache(void); void colo_release_ram_cache(void); void colo_incoming_start_dirty_log(void); +void colo_record_bitmap(RAMBlock *block, ram_addr_t *normal, uint32_t pages); /* Background snapshot */ bool ram_write_tracking_available(void); From 9d638407efa44153c5269f04e5010b105f5daac9 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 8 May 2023 21:10:55 +0200 Subject: [PATCH 02/10] ram: Let colo_flush_ram_cache take the bitmap_mutex This is not required, colo_flush_ram_cache does not run concurrently with the multifd threads since the cache is only flushed after everything has been received. But it makes me more comfortable. This will be used in the next commits to add colo support to multifd. Signed-off-by: Lukas Straub Reviewed-by: Juan Quintela Message-Id: <35cb23ba854151d38a31e3a5c8a1020e4283cb4a.1683572883.git.lukasstraub2@web.de> Signed-off-by: Juan Quintela --- migration/ram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index b5d03f85ab..f69d8d42b0 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3814,6 +3814,7 @@ void colo_flush_ram_cache(void) unsigned long offset = 0; memory_global_dirty_log_sync(); + qemu_mutex_lock(&ram_state->bitmap_mutex); WITH_RCU_READ_LOCK_GUARD() { RAMBLOCK_FOREACH_NOT_IGNORED(block) { ramblock_sync_dirty_bitmap(ram_state, block); @@ -3848,6 +3849,7 @@ void colo_flush_ram_cache(void) } } } + qemu_mutex_unlock(&ram_state->bitmap_mutex); trace_colo_flush_ram_cache_end(); } From 5d1d1fcf43ac54993d527612fccf9521c8608ae1 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Mon, 8 May 2023 21:11:07 +0200 Subject: [PATCH 03/10] multifd: Add the ramblock to MultiFDRecvParams This will be used in the next commits to add colo support to multifd. Signed-off-by: Lukas Straub Reviewed-by: Juan Quintela Message-Id: <88135197411df1a71d7832962b39abf60faf0021.1683572883.git.lukasstraub2@web.de> Signed-off-by: Juan Quintela --- migration/multifd.c | 11 +++++------ migration/multifd.h | 2 ++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index 4e71c19292..5c4298eadf 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -281,7 +281,6 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) { MultiFDPacket_t *packet = p->packet; - RAMBlock *block; int i; packet->magic = be32_to_cpu(packet->magic); @@ -331,21 +330,21 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) /* make sure that ramblock is 0 terminated */ packet->ramblock[255] = 0; - block = qemu_ram_block_by_name(packet->ramblock); - if (!block) { + p->block = qemu_ram_block_by_name(packet->ramblock); + if (!p->block) { error_setg(errp, "multifd: unknown ram block %s", packet->ramblock); return -1; } - p->host = block->host; + p->host = p->block->host; for (i = 0; i < p->normal_num; i++) { uint64_t offset = be64_to_cpu(packet->offset[i]); - if (offset > (block->used_length - p->page_size)) { + if (offset > (p->block->used_length - p->page_size)) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", - offset, block->used_length); + offset, p->block->used_length); return -1; } p->normal[i] = offset; diff --git a/migration/multifd.h b/migration/multifd.h index 7cfc265148..a835643b48 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -175,6 +175,8 @@ typedef struct { uint32_t next_packet_size; /* packets sent through this channel */ uint64_t num_packets; + /* ramblock */ + RAMBlock *block; /* ramblock host address */ uint8_t *host; /* non zero pages recv through this channel */ From 43c71fe3b846222154a99a02b17818fe54c1edaf Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:19 +0300 Subject: [PATCH 04/10] block/meson.build: prefer positive condition for replication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Lukas Straub Reviewed-by: Zhang Chen Message-Id: <20230428194928.1426370-2-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- block/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/meson.build b/block/meson.build index 13337bd070..486dda8b85 100644 --- a/block/meson.build +++ b/block/meson.build @@ -92,7 +92,7 @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c') block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit]) block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c')) block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c')) -if not get_option('replication').disabled() +if get_option('replication').allowed() block_ss.add(files('replication.c')) endif block_ss.add(when: libaio, if_true: files('linux-aio.c')) From 4332ffcd7b21f7391fef1d916e1e3cd5b4bdd268 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:20 +0300 Subject: [PATCH 05/10] colo: make colo_checkpoint_notify static and provide simpler API colo_checkpoint_notify() is mostly used in colo.c. Outside we use it once when x-checkpoint-delay migration parameter is set. So, let's simplify the external API to only that function - notify COLO that parameter was set. This make external API more robust and hides implementation details from external callers. Also this helps us to make COLO module optional in further patch (i.e. we are going to add possibility not build the COLO module). Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Reviewed-by: Zhang Chen Message-Id: <20230428194928.1426370-3-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- include/migration/colo.h | 9 ++++++++- migration/colo.c | 29 ++++++++++++++++++----------- migration/options.c | 4 +--- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/include/migration/colo.h b/include/migration/colo.h index 5fbe1a6d5d..7ef315473e 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -36,6 +36,13 @@ COLOMode get_colo_mode(void); /* failover */ void colo_do_failover(void); -void colo_checkpoint_notify(void *opaque); +/* + * colo_checkpoint_delay_set + * + * Handles change of x-checkpoint-delay migration parameter, called from + * migrate_params_apply() to notify COLO module about the change. + */ +void colo_checkpoint_delay_set(void); + void colo_shutdown(void); #endif diff --git a/migration/colo.c b/migration/colo.c index 07bfa21fea..c9e0b909b9 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -65,6 +65,24 @@ static bool colo_runstate_is_stopped(void) return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); } +static void colo_checkpoint_notify(void *opaque) +{ + MigrationState *s = opaque; + int64_t next_notify_time; + + qemu_event_set(&s->colo_checkpoint_event); + s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); + timer_mod(s->colo_delay_timer, next_notify_time); +} + +void colo_checkpoint_delay_set(void) +{ + if (migration_in_colo_state()) { + colo_checkpoint_notify(migrate_get_current()); + } +} + static void secondary_vm_do_failover(void) { /* COLO needs enable block-replication */ @@ -644,17 +662,6 @@ out: } } -void colo_checkpoint_notify(void *opaque) -{ - MigrationState *s = opaque; - int64_t next_notify_time; - - qemu_event_set(&s->colo_checkpoint_event); - s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); - next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); - timer_mod(s->colo_delay_timer, next_notify_time); -} - void migrate_start_colo_process(MigrationState *s) { qemu_mutex_unlock_iothread(); diff --git a/migration/options.c b/migration/options.c index 2e759cc306..9d92b15b76 100644 --- a/migration/options.c +++ b/migration/options.c @@ -1253,9 +1253,7 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_x_checkpoint_delay) { s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; - if (migration_in_colo_state()) { - colo_checkpoint_notify(s); - } + colo_checkpoint_delay_set(); } if (params->has_block_incremental) { From 51e47cf8600dab5beedd7fa369ffd645868672ec Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:21 +0300 Subject: [PATCH 06/10] build: move COLO under CONFIG_REPLICATION We don't allow to use x-colo capability when replication is not configured. So, no reason to build COLO when replication is disabled, it's unusable in this case. Note also that the check in migrate_caps_check() is not the only restriction: some functions in migration/colo.c will just abort if called with not defined CONFIG_REPLICATION, for example: migration_iteration_finish() case MIGRATION_STATUS_COLO: migrate_start_colo_process() colo_process_checkpoint() abort() It could probably make sense to have possibility to enable COLO without REPLICATION, but this requires deeper audit of colo & replication code, which may be done later if needed. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Message-Id: <20230428194928.1426370-4-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- hmp-commands.hx | 2 ++ migration/colo.c | 28 ------------------------ migration/meson.build | 6 ++++-- migration/migration-hmp-cmds.c | 2 ++ migration/migration.c | 6 ++++++ qapi/migration.json | 9 +++++--- stubs/colo.c | 39 ++++++++++++++++++++++++++++++++++ stubs/meson.build | 1 + 8 files changed, 60 insertions(+), 33 deletions(-) create mode 100644 stubs/colo.c diff --git a/hmp-commands.hx b/hmp-commands.hx index 9afbb54a51..2cbd0f77a0 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1052,6 +1052,7 @@ SRST migration (or once already in postcopy). ERST +#ifdef CONFIG_REPLICATION { .name = "x_colo_lost_heartbeat", .args_type = "", @@ -1060,6 +1061,7 @@ ERST "a failover or takeover is needed.", .cmd = hmp_x_colo_lost_heartbeat, }, +#endif SRST ``x_colo_lost_heartbeat`` diff --git a/migration/colo.c b/migration/colo.c index c9e0b909b9..6c7c313956 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -26,9 +26,7 @@ #include "qemu/rcu.h" #include "migration/failover.h" #include "migration/ram.h" -#ifdef CONFIG_REPLICATION #include "block/replication.h" -#endif #include "net/colo-compare.h" #include "net/colo.h" #include "block/block.h" @@ -86,7 +84,6 @@ void colo_checkpoint_delay_set(void) static void secondary_vm_do_failover(void) { /* COLO needs enable block-replication */ -#ifdef CONFIG_REPLICATION int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; @@ -151,14 +148,10 @@ static void secondary_vm_do_failover(void) if (mis->migration_incoming_co) { qemu_coroutine_enter(mis->migration_incoming_co); } -#else - abort(); -#endif } static void primary_vm_do_failover(void) { -#ifdef CONFIG_REPLICATION MigrationState *s = migrate_get_current(); int old_state; Error *local_err = NULL; @@ -199,9 +192,6 @@ static void primary_vm_do_failover(void) /* Notify COLO thread that failover work is finished */ qemu_sem_post(&s->colo_exit_sem); -#else - abort(); -#endif } COLOMode get_colo_mode(void) @@ -235,7 +225,6 @@ void colo_do_failover(void) } } -#ifdef CONFIG_REPLICATION void qmp_xen_set_replication(bool enable, bool primary, bool has_failover, bool failover, Error **errp) @@ -289,7 +278,6 @@ void qmp_xen_colo_do_checkpoint(Error **errp) /* Notify all filters of all NIC to do checkpoint */ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp); } -#endif COLOStatus *qmp_query_colo_status(Error **errp) { @@ -453,15 +441,11 @@ static int colo_do_checkpoint_transaction(MigrationState *s, } qemu_mutex_lock_iothread(); -#ifdef CONFIG_REPLICATION replication_do_checkpoint_all(&local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } -#else - abort(); -#endif colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { @@ -579,15 +563,11 @@ static void colo_process_checkpoint(MigrationState *s) object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); -#ifdef CONFIG_REPLICATION replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } -#else - abort(); -#endif vm_start(); qemu_mutex_unlock_iothread(); @@ -755,7 +735,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, return; } -#ifdef CONFIG_REPLICATION replication_get_error_all(&local_err); if (local_err) { error_propagate(errp, local_err); @@ -772,9 +751,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, qemu_mutex_unlock_iothread(); return; } -#else - abort(); -#endif /* Notify all filters of all NIC to do checkpoint */ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); @@ -881,15 +857,11 @@ void *colo_process_incoming_thread(void *opaque) object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); -#ifdef CONFIG_REPLICATION replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); if (local_err) { qemu_mutex_unlock_iothread(); goto out; } -#else - abort(); -#endif vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); diff --git a/migration/meson.build b/migration/meson.build index 75de868bb7..eb41b77db9 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -13,8 +13,6 @@ softmmu_ss.add(files( 'block-dirty-bitmap.c', 'channel.c', 'channel-block.c', - 'colo-failover.c', - 'colo.c', 'exec.c', 'fd.c', 'global_state.c', @@ -33,6 +31,10 @@ softmmu_ss.add(files( 'threadinfo.c', ), gnutls) +if get_option('replication').allowed() + softmmu_ss.add(files('colo-failover.c', 'colo.c')) +endif + softmmu_ss.add(when: rdma, if_true: files('rdma.c')) if get_option('live_block_migration').allowed() softmmu_ss.add(files('block.c')) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 4e9f00e7dc..9885d7c9f7 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -643,6 +643,7 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, err); } +#ifdef CONFIG_REPLICATION void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) { Error *err = NULL; @@ -650,6 +651,7 @@ void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) qmp_x_colo_lost_heartbeat(&err); hmp_handle_error(mon, err); } +#endif typedef struct HMPMigrationStatus { QEMUTimer *timer; diff --git a/migration/migration.c b/migration/migration.c index f9f12a17b5..61b316245d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -392,6 +392,12 @@ void migration_incoming_disable_colo(void) int migration_incoming_enable_colo(void) { +#ifndef CONFIG_REPLICATION + error_report("ENABLE_COLO command come in migration stream, but COLO " + "module is not built in"); + return -ENOTSUP; +#endif + if (ram_block_discard_disable(true)) { error_report("COLO: cannot disable RAM discard"); return -EBUSY; diff --git a/qapi/migration.json b/qapi/migration.json index 952d3e2c9a..179af0c4d8 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1366,7 +1366,8 @@ # <- { "return": {} } ## { 'command': 'x-colo-lost-heartbeat', - 'features': [ 'unstable' ] } + 'features': [ 'unstable' ], + 'if': 'CONFIG_REPLICATION' } ## # @migrate_cancel: @@ -1638,7 +1639,8 @@ ## { 'struct': 'COLOStatus', 'data': { 'mode': 'COLOMode', 'last-mode': 'COLOMode', - 'reason': 'COLOExitReason' } } + 'reason': 'COLOExitReason' }, + 'if': 'CONFIG_REPLICATION' } ## # @query-colo-status: @@ -1655,7 +1657,8 @@ # Since: 3.1 ## { 'command': 'query-colo-status', - 'returns': 'COLOStatus' } + 'returns': 'COLOStatus', + 'if': 'CONFIG_REPLICATION' } ## # @migrate-recover: diff --git a/stubs/colo.c b/stubs/colo.c new file mode 100644 index 0000000000..cf9816d368 --- /dev/null +++ b/stubs/colo.c @@ -0,0 +1,39 @@ +#include "qemu/osdep.h" +#include "qemu/notify.h" +#include "net/colo-compare.h" +#include "migration/colo.h" +#include "migration/migration.h" +#include "qemu/error-report.h" +#include "qapi/qapi-commands-migration.h" + +void colo_shutdown(void) +{ +} + +void *colo_process_incoming_thread(void *opaque) +{ + error_report("Impossible happend: trying to start COLO thread when COLO " + "module is not built in"); + abort(); +} + +void colo_checkpoint_delay_set(void) +{ +} + +void migrate_start_colo_process(MigrationState *s) +{ + error_report("Impossible happend: trying to start COLO when COLO " + "module is not built in"); + abort(); +} + +bool migration_in_colo_state(void) +{ + return false; +} + +bool migration_incoming_in_colo_state(void) +{ + return false; +} diff --git a/stubs/meson.build b/stubs/meson.build index b2b5956d97..8412cad15f 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -45,6 +45,7 @@ stub_ss.add(files('target-get-monitor-def.c')) stub_ss.add(files('target-monitor-defs.c')) stub_ss.add(files('trace-control.c')) stub_ss.add(files('uuid.c')) +stub_ss.add(files('colo.c')) stub_ss.add(files('vmstate.c')) stub_ss.add(files('vm-stop.c')) stub_ss.add(files('win32-kbd-hook.c')) From 1d4cfcd4091bc6567ba759881f98726c808f7490 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:23 +0300 Subject: [PATCH 07/10] migration: drop colo_incoming_thread from MigrationIncomingState have_colo_incoming_thread variable is unused. colo_incoming_thread can be local. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Reviewed-by: Zhang Chen Message-Id: <20230428194928.1426370-6-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- migration/migration.c | 7 ++++--- migration/migration.h | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 61b316245d..c7f628caa6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -544,6 +544,8 @@ process_incoming_migration_co(void *opaque) /* we get COLO info, and know if we are in COLO mode */ if (!ret && migration_incoming_colo_enabled()) { + QemuThread colo_incoming_thread; + /* Make sure all file formats throw away their mutable metadata */ bdrv_activate_all(&local_err); if (local_err) { @@ -551,14 +553,13 @@ process_incoming_migration_co(void *opaque) goto fail; } - qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", + qemu_thread_create(&colo_incoming_thread, "COLO incoming", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); - mis->have_colo_incoming_thread = true; qemu_coroutine_yield(); qemu_mutex_unlock_iothread(); /* Wait checkpoint incoming thread exit before free resource */ - qemu_thread_join(&mis->colo_incoming_thread); + qemu_thread_join(&colo_incoming_thread); qemu_mutex_lock_iothread(); /* We hold the global iothread lock, so it is safe here */ colo_release_ram_cache(); diff --git a/migration/migration.h b/migration/migration.h index 3a918514e7..7721c7658b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -162,8 +162,6 @@ struct MigrationIncomingState { int state; - bool have_colo_incoming_thread; - QemuThread colo_incoming_thread; /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; From ecbfec6d7769b5362feac92404d564622198bf85 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:24 +0300 Subject: [PATCH 08/10] migration: process_incoming_migration_co: simplify code flow around ret Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Reviewed-by: Zhang Chen Message-Id: <20230428194928.1426370-7-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- migration/migration.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index c7f628caa6..140b2a4de6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -542,8 +542,13 @@ process_incoming_migration_co(void *opaque) /* Else if something went wrong then just fall out of the normal exit */ } + if (ret < 0) { + error_report("load of migration failed: %s", strerror(-ret)); + goto fail; + } + /* we get COLO info, and know if we are in COLO mode */ - if (!ret && migration_incoming_colo_enabled()) { + if (migration_incoming_colo_enabled()) { QemuThread colo_incoming_thread; /* Make sure all file formats throw away their mutable metadata */ @@ -565,10 +570,6 @@ process_incoming_migration_co(void *opaque) colo_release_ram_cache(); } - if (ret < 0) { - error_report("load of migration failed: %s", strerror(-ret)); - goto fail; - } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); mis->migration_incoming_co = NULL; From d70178a88fe8d0873508f6d4757018092262e9ec Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:27 +0300 Subject: [PATCH 09/10] migration: disallow change capabilities in COLO state COLO is not listed as running state in migrate_is_running(), so, it's theoretically possible to disable colo capability in COLO state and the unexpected error in migration_iteration_finish() is reachable. Let's disallow that in qmp_migrate_set_capabilities. Than the error becomes absolutely unreachable: we can get into COLO state only with enabled capability and can't disable it while we are in COLO state. So substitute the error by simple assertion. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Message-Id: <20230428194928.1426370-10-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- migration/migration.c | 5 +---- migration/options.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 140b2a4de6..bb254e4f07 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2785,10 +2785,7 @@ static void migration_iteration_finish(MigrationState *s) runstate_set(RUN_STATE_POSTMIGRATE); break; case MIGRATION_STATUS_COLO: - if (!migrate_colo()) { - error_report("%s: critical error: calling COLO code without " - "COLO enabled", __func__); - } + assert(migrate_colo()); migrate_start_colo_process(s); s->vm_was_running = true; /* Fallthrough */ diff --git a/migration/options.c b/migration/options.c index 9d92b15b76..7ed88b7b32 100644 --- a/migration/options.c +++ b/migration/options.c @@ -598,7 +598,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationCapabilityStatusList *cap; bool new_caps[MIGRATION_CAPABILITY__MAX]; - if (migration_is_running(s->state)) { + if (migration_is_running(s->state) || migration_in_colo_state()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } From 121ccedc2bf0c124e93991275336415d12d2e3df Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 28 Apr 2023 22:49:28 +0300 Subject: [PATCH 10/10] migration: block incoming colo when capability is disabled We generally require same set of capabilities on source and target. Let's require x-colo capability to use COLO on target. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Reviewed-by: Lukas Straub Reviewed-by: Zhang Chen Message-Id: <20230428194928.1426370-11-vsementsov@yandex-team.ru> Signed-off-by: Juan Quintela --- docs/COLO-FT.txt | 1 + migration/migration.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 8ec653f81c..2e760a4aee 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -210,6 +210,7 @@ children.0=childs0 \ 3. On Secondary VM's QEMU monitor, issue command {"execute":"qmp_capabilities"} +{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } {"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } } {"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } } diff --git a/migration/migration.c b/migration/migration.c index bb254e4f07..439e8651df 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -398,6 +398,12 @@ int migration_incoming_enable_colo(void) return -ENOTSUP; #endif + if (!migrate_colo()) { + error_report("ENABLE_COLO command come in migration stream, but c-colo " + "capability is not set"); + return -EINVAL; + } + if (ram_block_discard_disable(true)) { error_report("COLO: cannot disable RAM discard"); return -EBUSY;