From d069bcca6c8cfcc1f2b056cb794ad726ab7c5f16 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 13 Jan 2020 18:41:02 +0100 Subject: [PATCH 01/29] multifd: Initialize local variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill everything with zero, so the padding fields are also initialized. Signed-off-by: Juan Quintela Reviewed-by: Daniel P. Berrangé --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 96feb4062c..b9147bcca3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -703,7 +703,7 @@ typedef struct { static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) { - MultiFDInit_t msg; + MultiFDInit_t msg = {}; int ret; msg.magic = cpu_to_be32(MULTIFD_MAGIC); From b99784ef6c3beff2c43ea8d3bc50cdcd7903658f Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 23 Oct 2017 14:50:16 +0200 Subject: [PATCH 02/29] migration-test: Add migration multifd test We set multifd-channels. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Thomas Huth Tested-by: Wei Yang Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 341d190922..81f65602de 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -1202,6 +1202,61 @@ static void test_migrate_auto_converge(void) test_migrate_end(from, to, true); } +static void test_multifd_tcp(void) +{ + MigrateStart *args = migrate_start_new(); + QTestState *from, *to; + QDict *rsp; + char *uri; + + if (test_migrate_start(&from, &to, "defer", args)) { + return; + } + + /* + * We want to pick a speed slow enough that the test completes + * quickly, but that it doesn't complete precopy even on a slow + * machine, so also set the downtime. + */ + /* 1 ms should make it not converge*/ + migrate_set_parameter_int(from, "downtime-limit", 1); + /* 1GB/s */ + migrate_set_parameter_int(from, "max-bandwidth", 1000000000); + + migrate_set_parameter_int(from, "multifd-channels", 16); + migrate_set_parameter_int(to, "multifd-channels", 16); + + migrate_set_capability(from, "multifd", "true"); + migrate_set_capability(to, "multifd", "true"); + + /* Start incoming migration from the 1st socket */ + rsp = wait_command(to, "{ 'execute': 'migrate-incoming'," + " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + qobject_unref(rsp); + + /* Wait for the first serial output from the source */ + wait_for_serial("src_serial"); + + uri = migrate_get_socket_address(to, "socket-address"); + + migrate_qmp(from, uri, "{}"); + + wait_for_migration_pass(from); + + /* 300ms it should converge */ + migrate_set_parameter_int(from, "downtime-limit", 300); + + if (!got_stop) { + qtest_qmp_eventwait(from, "STOP"); + } + qtest_qmp_eventwait(to, "RESUME"); + + wait_for_serial("dest_serial"); + wait_for_migration_complete(from); + test_migrate_end(from, to, true); + free(uri); +} + int main(int argc, char **argv) { char template[] = "/tmp/migration-test-XXXXXX"; @@ -1266,6 +1321,7 @@ int main(int argc, char **argv) test_validate_uuid_dst_not_set); qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); + qtest_add_func("/migration/multifd/tcp", test_multifd_tcp); ret = g_test_run(); From 4d65a6216bfc44891ac298b74a6921d479805131 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 18 Dec 2019 05:36:22 +0100 Subject: [PATCH 03/29] migration: Make sure that we don't call write() in case of error If we are exiting due to an error/finish/.... Just don't try to even touch the channel with one IO operation. Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- migration/ram.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index b9147bcca3..f946282adb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -900,6 +900,12 @@ struct { uint64_t packet_num; /* send channels ready */ QemuSemaphore channels_ready; + /* + * Have we already run terminate threads. There is a race when it + * happens that we got one error while we are exiting. + * We will use atomic operations. Only valid values are 0 and 1. + */ + int exiting; } *multifd_send_state; /* @@ -928,6 +934,10 @@ static int multifd_send_pages(RAMState *rs) MultiFDPages_t *pages = multifd_send_state->pages; uint64_t transferred; + if (atomic_read(&multifd_send_state->exiting)) { + return -1; + } + qemu_sem_wait(&multifd_send_state->channels_ready); for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { p = &multifd_send_state->params[i]; @@ -1009,6 +1019,16 @@ static void multifd_send_terminate_threads(Error *err) } } + /* + * We don't want to exit each threads twice. Depending on where + * we get the error, or if there are two independent errors in two + * threads at the same time, we can end calling this function + * twice. + */ + if (atomic_xchg(&multifd_send_state->exiting, 1)) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1118,6 +1138,10 @@ static void *multifd_send_thread(void *opaque) while (true) { qemu_sem_wait(&p->sem); + + if (atomic_read(&multifd_send_state->exiting)) { + break; + } qemu_mutex_lock(&p->mutex); if (p->pending_job) { @@ -1224,6 +1248,7 @@ int multifd_save_setup(void) multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); multifd_send_state->pages = multifd_pages_init(page_count); qemu_sem_init(&multifd_send_state->channels_ready, 0); + atomic_set(&multifd_send_state->exiting, 0); for (i = 0; i < thread_count; i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; From 6a22c5444c7cf79aaa47d1bc9485cf5dfe90429b Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Wed, 3 Apr 2019 11:00:04 +0200 Subject: [PATCH 04/29] migration-test: introduce functions to handle string parameters Signed-off-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 81f65602de..26e2e77289 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -356,6 +356,43 @@ static void migrate_set_parameter_int(QTestState *who, const char *parameter, migrate_check_parameter_int(who, parameter, value); } +static char *migrate_get_parameter_str(QTestState *who, + const char *parameter) +{ + QDict *rsp; + char *result; + + rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }"); + result = g_strdup(qdict_get_str(rsp, parameter)); + qobject_unref(rsp); + return result; +} + +static void migrate_check_parameter_str(QTestState *who, const char *parameter, + const char *value) +{ + char *result; + + result = migrate_get_parameter_str(who, parameter); + g_assert_cmpstr(result, ==, value); + g_free(result); +} + +__attribute__((unused)) +static void migrate_set_parameter_str(QTestState *who, const char *parameter, + const char *value) +{ + QDict *rsp; + + rsp = qtest_qmp(who, + "{ 'execute': 'migrate-set-parameters'," + "'arguments': { %s: %s } }", + parameter, value); + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + migrate_check_parameter_str(who, parameter, value); +} + static void migrate_pause(QTestState *who) { QDict *rsp; From ddad81bd28de665475a87693a93e6cf5d6fd8bab Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 29 Nov 2019 12:51:32 +0100 Subject: [PATCH 05/29] runstate: ignore finishmigrate -> prelaunch transition Commit 1bd71dce4bf2 tries to prevent a finishmigrate -> prelaunch transition by exiting at the beginning of the main_loop_should_exit() function if the state is already finishmigrate. As the finishmigrate state is set in the migration thread it can happen concurrently to the function. The migration thread and the function are normally protected by the iothread mutex and thus the state should no evolve between the start of the function and its end. Unfortunately during the function life the lock is released by pause_all_vcpus() just before the point we need to be sure we are not in finishmigrate state and if the migration thread is waiting for the lock it will take the opportunity to change the state to finishmigrate. The only way to be sure we are not in the finishmigrate state when we need is to check the state after the pause_all_vcpus() function. Fixes: 1bd71dce4bf2 ("runstate: ignore exit request in finish migrate state") Signed-off-by: Laurent Vivier Signed-off-by: Juan Quintela --- vl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vl.c b/vl.c index 751401214c..71d3e7eefb 100644 --- a/vl.c +++ b/vl.c @@ -1604,9 +1604,6 @@ static bool main_loop_should_exit(void) RunState r; ShutdownCause request; - if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - return false; - } if (preconfig_exit_requested) { if (runstate_check(RUN_STATE_PRECONFIG)) { runstate_set(RUN_STATE_PRELAUNCH); @@ -1635,8 +1632,13 @@ static bool main_loop_should_exit(void) pause_all_vcpus(); qemu_system_reset(request); resume_all_vcpus(); + /* + * runstate can change in pause_all_vcpus() + * as iothread mutex is unlocked + */ if (!runstate_check(RUN_STATE_RUNNING) && - !runstate_check(RUN_STATE_INMIGRATE)) { + !runstate_check(RUN_STATE_INMIGRATE) && + !runstate_check(RUN_STATE_FINISH_MIGRATE)) { runstate_set(RUN_STATE_PRELAUNCH); } } From 03acb4e94deada2dd32412725420db82682f2e2e Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 6 Jan 2020 15:23:31 -0300 Subject: [PATCH 06/29] ram.c: remove unneeded labels ram_save_queue_pages() has an 'err' label that can be replaced by 'return -1' instead. Same thing with ram_discard_range(), and in this case we can also get rid of the 'ret' variable and return either '-1' on error or the result of ram_block_discard_range(). CC: Juan Quintela CC: Dr. David Alan Gilbert Signed-off-by: Daniel Henrique Barboza Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index f946282adb..17cd8d524b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2459,7 +2459,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) * it's the 1st request. */ error_report("ram_save_queue_pages no previous block"); - goto err; + return -1; } } else { ramblock = qemu_ram_block_by_name(rbname); @@ -2467,7 +2467,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) if (!ramblock) { /* We shouldn't be asked for a non-existent RAMBlock */ error_report("ram_save_queue_pages no block '%s'", rbname); - goto err; + return -1; } rs->last_req_rb = ramblock; } @@ -2476,7 +2476,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) error_report("%s request overrun start=" RAM_ADDR_FMT " len=" RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, __func__, start, len, ramblock->used_length); - goto err; + return -1; } struct RAMSrcPageRequest *new_entry = @@ -2492,9 +2492,6 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) qemu_mutex_unlock(&rs->src_page_req_mutex); return 0; - -err: - return -1; } static bool save_page_use_compression(RAMState *rs) @@ -3097,8 +3094,6 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) */ int ram_discard_range(const char *rbname, uint64_t start, size_t length) { - int ret = -1; - trace_ram_discard_range(rbname, start, length); RCU_READ_LOCK_GUARD(); @@ -3106,7 +3101,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) if (!rb) { error_report("ram_discard_range: Failed to find block '%s'", rbname); - goto err; + return -1; } /* @@ -3118,10 +3113,7 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) length >> qemu_target_page_bits()); } - ret = ram_block_discard_range(rb, start, length); - -err: - return ret; + return ram_block_discard_range(rb, start, length); } /* From 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 5 Dec 2019 10:29:18 +0000 Subject: [PATCH 07/29] migration: Rate limit inside host pages When using hugepages, rate limiting is necessary within each huge page, since a 1G huge page can take a significant time to send, so you end up with bursty behaviour. Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") Reported-by: Lin Ma Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Reviewed-by: Peter Xu Signed-off-by: Juan Quintela --- migration/migration.c | 57 ++++++++++++++++++++++++------------------ migration/migration.h | 1 + migration/ram.c | 2 ++ migration/trace-events | 4 +-- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 354ad072fa..27500d09a9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3224,6 +3224,37 @@ void migration_consume_urgent_request(void) qemu_sem_wait(&migrate_get_current()->rate_limit_sem); } +/* Returns true if the rate limiting was broken by an urgent request */ +bool migration_rate_limit(void) +{ + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + MigrationState *s = migrate_get_current(); + + bool urgent = false; + migration_update_counters(s, now); + if (qemu_file_rate_limit(s->to_dst_file)) { + /* + * Wait for a delay to do rate limiting OR + * something urgent to post the semaphore. + */ + int ms = s->iteration_start_time + BUFFER_DELAY - now; + trace_migration_rate_limit_pre(ms); + if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { + /* + * We were woken by one or more urgent things but + * the timedwait will have consumed one of them. + * The service routine for the urgent wake will dec + * the semaphore itself for each item it consumes, + * so add this one we just eat back. + */ + qemu_sem_post(&s->rate_limit_sem); + urgent = true; + } + trace_migration_rate_limit_post(urgent); + } + return urgent; +} + /* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. @@ -3290,8 +3321,6 @@ static void *migration_thread(void *opaque) trace_migration_thread_setup_complete(); while (migration_is_active(s)) { - int64_t current_time; - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { MigIterateState iter_state = migration_iteration_run(s); if (iter_state == MIG_ITERATE_SKIP) { @@ -3318,29 +3347,7 @@ static void *migration_thread(void *opaque) update_iteration_initial_status(s); } - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - migration_update_counters(s, current_time); - - urgent = false; - if (qemu_file_rate_limit(s->to_dst_file)) { - /* Wait for a delay to do rate limiting OR - * something urgent to post the semaphore. - */ - int ms = s->iteration_start_time + BUFFER_DELAY - current_time; - trace_migration_thread_ratelimit_pre(ms); - if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { - /* We were worken by one or more urgent things but - * the timedwait will have consumed one of them. - * The service routine for the urgent wake will dec - * the semaphore itself for each item it consumes, - * so add this one we just eat back. - */ - qemu_sem_post(&s->rate_limit_sem); - urgent = true; - } - trace_migration_thread_ratelimit_post(urgent); - } + urgent = migration_rate_limit(); } trace_migration_thread_after_loop(); diff --git a/migration/migration.h b/migration/migration.h index 79b3dda146..aa9ff6f27b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -341,5 +341,6 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); void migration_make_urgent_request(void); void migration_consume_urgent_request(void); +bool migration_rate_limit(void); #endif diff --git a/migration/ram.c b/migration/ram.c index 17cd8d524b..1ec5c10561 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2639,6 +2639,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, pages += tmppages; pss->page++; + /* Allow rate limiting to happen in the middle of huge pages */ + migration_rate_limit(); } while ((pss->page & (pagesize_bits - 1)) && offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); diff --git a/migration/trace-events b/migration/trace-events index 6dee7b5389..2f9129e213 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 migration_completion_file_err(void) "" migration_completion_postcopy_end(void) "" migration_completion_postcopy_end_after_complete(void) "" +migration_rate_limit_pre(int ms) "%d ms" +migration_rate_limit_post(int urgent) "urgent: %d" migration_return_path_end_before(void) "" migration_return_path_end_after(int rp_error) "%d" migration_thread_after_loop(void) "" migration_thread_file_err(void) "" -migration_thread_ratelimit_pre(int ms) "%d ms" -migration_thread_ratelimit_post(int urgent) "urgent: %d" migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" From 2667c98722ddec12c9f43902c6c5668023e1dce9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 22 Nov 2019 09:00:38 +0100 Subject: [PATCH 08/29] migration: Fix incorrect integer->float conversion caught by clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang does not like qmp_migrate_set_downtime()'s code to clamp double @value to 0..INT64_MAX: qemu/migration/migration.c:2038:24: error: implicit conversion from 'long' to 'double' changes value from 9223372036854775807 to 9223372036854775808 [-Werror,-Wimplicit-int-float-conversion] The warning will be enabled by default in clang 10. It is not available for clang <= 9. The clamp is actually useless; @value is checked to be within 0..MAX_MIGRATE_DOWNTIME_SECONDS immediately before. Delete it. While there, make the conversion from double to int64_t explicit. Signed-off-by: Fangrui Song Reviewed-by: Markus Armbruster Reviewed-by: Juan Quintela Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé [Patch split, commit message improved] Signed-off-by: Markus Armbruster Signed-off-by: Juan Quintela --- migration/migration.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 27500d09a9..f79d0bf89a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2035,11 +2035,10 @@ void qmp_migrate_set_downtime(double value, Error **errp) } value *= 1000; /* Convert to milliseconds */ - value = MAX(0, MIN(INT64_MAX, value)); MigrateSetParameters p = { .has_downtime_limit = true, - .downtime_limit = value, + .downtime_limit = (int64_t)value, }; qmp_migrate_set_parameters(&p, errp); From 603d5a42d3cca58d69dbe9728a47767896486cae Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Wed, 13 Nov 2019 20:53:25 +0300 Subject: [PATCH 09/29] migration: Fix the re-run check of the migrate-incoming command The current check sets an error but doesn't fail the command. This may cause a problem if new connection attempt by the same URI affects the first connection. Signed-off-by: Yury Kotov Reviewed-by: Juan Quintela Reviewed-by: Darren Kenny Signed-off-by: Juan Quintela --- migration/migration.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/migration.c b/migration/migration.c index f79d0bf89a..e55edee606 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1784,6 +1784,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp) } if (!once) { error_setg(errp, "The incoming migration has already been started"); + return; } qemu_start_incoming_migration(uri, &local_err); From 619bd31d1f7cc5878e75e989a78d6f1c79356d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 3 Jan 2020 11:39:58 +0400 Subject: [PATCH 10/29] misc: use QEMU_IS_ALIGNED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marc-André Lureau Reviewed-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefan Berger Reviewed-by: Paolo Bonzini Signed-off-by: Juan Quintela --- exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index 0f6b087f57..67e520d18e 100644 --- a/exec.c +++ b/exec.c @@ -3895,7 +3895,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) uint8_t *host_startaddr = rb->host + start; - if ((uintptr_t)host_startaddr & (rb->page_size - 1)) { + if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { error_report("ram_block_discard_range: Unaligned start address: %p", host_startaddr); goto err; @@ -3903,7 +3903,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) if ((start + length) <= rb->used_length) { bool need_madvise, need_fallocate; - if (length & (rb->page_size - 1)) { + if (!QEMU_IS_ALIGNED(length, rb->page_size)) { error_report("ram_block_discard_range: Unaligned length: %zx", length); goto err; From bd5de61e7b926b5bb9dae254630bf3c8be575bc0 Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Thu, 17 Oct 2019 15:59:52 -0500 Subject: [PATCH 11/29] migration: add savevm_state_handler_remove() Create a function to abstract common logic needed when removing a SaveStateEntry element from the savevm_state.handlers queue. For now we just remove the element. Soon it will involve additional cleanup. Signed-off-by: Scott Cheloha Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/savevm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 59efc1981d..30d980caa2 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -725,6 +725,11 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) } } +static void savevm_state_handler_remove(SaveStateEntry *se) +{ + QTAILQ_REMOVE(&savevm_state.handlers, se, entry); +} + /* TODO: Individual devices generally have very little idea about the rest of the system, so instance_id should be removed/replaced. Meanwhile pass -1 as instance_id if you do not already have a clearly @@ -777,7 +782,7 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque) QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_state.handlers, se, entry); + savevm_state_handler_remove(se); g_free(se->compat); g_free(se); } @@ -841,7 +846,7 @@ void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) { if (se->vmsd == vmsd && se->opaque == opaque) { - QTAILQ_REMOVE(&savevm_state.handlers, se, entry); + savevm_state_handler_remove(se); g_free(se->compat); g_free(se); } From 174723ffe586e453f8ed4010ea07bbf79805b63f Mon Sep 17 00:00:00 2001 From: Scott Cheloha Date: Thu, 17 Oct 2019 15:59:53 -0500 Subject: [PATCH 12/29] migration: savevm_state_handler_insert: constant-time element insertion savevm_state's SaveStateEntry TAILQ is a priority queue. Priority sorting is maintained by searching from head to tail for a suitable insertion spot. Insertion is thus an O(n) operation. If we instead keep track of the head of each priority's subqueue within that larger queue we can reduce this operation to O(1) time. savevm_state_handler_remove() becomes slightly more complex to accomodate these gains: we need to replace the head of a priority's subqueue when removing it. With O(1) insertion, booting VMs with many SaveStateEntry objects is more plausible. For example, a ppc64 VM with maxmem=8T has 40000 such objects to insert. Signed-off-by: Scott Cheloha Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/savevm.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 30d980caa2..e57686bca7 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -250,6 +250,7 @@ typedef struct SaveStateEntry { typedef struct SaveState { QTAILQ_HEAD(, SaveStateEntry) handlers; + SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1]; int global_section_id; uint32_t len; const char *name; @@ -261,6 +262,7 @@ typedef struct SaveState { static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), + .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL }, .global_section_id = 0, }; @@ -709,24 +711,42 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) { MigrationPriority priority = save_state_priority(nse); SaveStateEntry *se; + int i; assert(priority <= MIG_PRI_MAX); - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (save_state_priority(se) < priority) { + for (i = priority - 1; i >= 0; i--) { + se = savevm_state.handler_pri_head[i]; + if (se != NULL) { + assert(save_state_priority(se) < priority); break; } } - if (se) { + if (i >= 0) { QTAILQ_INSERT_BEFORE(se, nse, entry); } else { QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry); } + + if (savevm_state.handler_pri_head[priority] == NULL) { + savevm_state.handler_pri_head[priority] = nse; + } } static void savevm_state_handler_remove(SaveStateEntry *se) { + SaveStateEntry *next; + MigrationPriority priority = save_state_priority(se); + + if (se == savevm_state.handler_pri_head[priority]) { + next = QTAILQ_NEXT(se, entry); + if (next != NULL && save_state_priority(next) == priority) { + savevm_state.handler_pri_head[priority] = next; + } else { + savevm_state.handler_pri_head[priority] = NULL; + } + } QTAILQ_REMOVE(&savevm_state.handlers, se, entry); } From e65cec5e5d97927d22b39167d3e8edeffc771788 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Mon, 25 Nov 2019 16:36:32 +0300 Subject: [PATCH 13/29] migration/ram: Yield periodically to the main loop Usually, incoming migration coroutine yields to the main loop while its IO-channel is waiting for data to receive. But there is a case when RAM migration and data receive have the same speed: VM with huge zeroed RAM. In this case, IO-channel won't read and thus the main loop is stuck and for instance, it doesn't respond to QMP commands. For this case, yield periodically, but not too often, so as not to affect the speed of migration. Signed-off-by: Yury Kotov Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 1ec5c10561..5cd066467c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4246,7 +4246,7 @@ static void colo_flush_ram_cache(void) */ static int ram_load_precopy(QEMUFile *f) { - int flags = 0, ret = 0, invalid_flags = 0, len = 0; + int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = postcopy_is_advised(); if (!migrate_use_compression()) { @@ -4258,6 +4258,17 @@ static int ram_load_precopy(QEMUFile *f) void *host = NULL; uint8_t ch; + /* + * Yield periodically to let main loop run, but an iteration of + * the main loop is expensive, so do it each some iterations + */ + if ((i & 32767) == 0 && qemu_in_coroutine()) { + aio_co_schedule(qemu_get_current_aio_context(), + qemu_coroutine_self()); + qemu_coroutine_yield(); + } + i++; + addr = qemu_get_be64(f); flags = addr & ~TARGET_PAGE_MASK; addr &= TARGET_PAGE_MASK; From 2e36bc1b884a6fc6bc626b13db4960773ad976f7 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:02 +0800 Subject: [PATCH 14/29] migration/postcopy: reduce memset when it is zero page and matches_target_page_size In this case, page_buffer content would not be used. Skip this to save some time. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 5cd066467c..bdb0316892 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4126,7 +4126,13 @@ static int ram_load_postcopy(QEMUFile *f) switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_ZERO: ch = qemu_get_byte(f); - memset(page_buffer, ch, TARGET_PAGE_SIZE); + /* + * Can skip to set page_buffer when + * this is a zero page and (block->page_size == TARGET_PAGE_SIZE). + */ + if (ch || !matches_target_page_size) { + memset(page_buffer, ch, TARGET_PAGE_SIZE); + } if (ch) { all_zero = false; } From ca1a6b708ba2d94f2dd64bb8ec04223a708c9b40 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:03 +0800 Subject: [PATCH 15/29] migration/postcopy: wait for decompress thread in precopy Compress is not supported with postcopy, it is safe to wait for decompress thread just in precopy. This is a preparation for later patch. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index bdb0316892..c13b44b4d9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4421,6 +4421,7 @@ static int ram_load_precopy(QEMUFile *f) } } + ret |= wait_for_decompress_done(); return ret; } @@ -4452,8 +4453,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } else { ret = ram_load_precopy(f); } - - ret |= wait_for_decompress_done(); } trace_ram_load_complete(ret, seq_iter); From 4cbb3c63c165ae71b71f764030c7b16d98679391 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:04 +0800 Subject: [PATCH 16/29] migration/postcopy: count target page number to decide the place_needed In postcopy, it requires to place whole host page instead of target page. Currently, it relies on the page offset to decide whether this is the last target page. We also can count the target page number during the iteration. When the number of target page equals (host page size / target page size), this means it is the last target page in the host page. This is a preparation for non-ordered target page transmission. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index c13b44b4d9..8ebaea255e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4052,6 +4052,7 @@ static int ram_load_postcopy(QEMUFile *f) void *postcopy_host_page = mis->postcopy_tmp_page; void *last_host = NULL; bool all_zero = false; + int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; @@ -4086,6 +4087,7 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } + target_pages++; matches_target_page_size = block->page_size == TARGET_PAGE_SIZE; /* * Postcopy requires that we place whole host pages atomically; @@ -4117,8 +4119,10 @@ static int ram_load_postcopy(QEMUFile *f) * If it's the last part of a host page then we place the host * page */ - place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) & - (block->page_size - 1)) == 0; + if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { + place_needed = true; + target_pages = 0; + } place_source = postcopy_host_page; } last_host = host; From e5e73b0f90d0ac956782378b92897455e8104a7e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:05 +0800 Subject: [PATCH 17/29] migration/postcopy: set all_zero to true on the first target page For the first target page, all_zero is set to true for this round check. After target_pages introduced, we could leverage this variable instead of checking the address offset. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 8ebaea255e..460abfa2c3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4102,7 +4102,7 @@ static int ram_load_postcopy(QEMUFile *f) page_buffer = postcopy_host_page + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ - if (!((uintptr_t)host & (block->page_size - 1))) { + if (target_pages == 1) { all_zero = true; } else { /* not the 1st TP within the HP */ From 91ba442f5c26eaf45e2f78fc7e40fe6a7fdf7b9d Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:06 +0800 Subject: [PATCH 18/29] migration/postcopy: enable random order target page arrival After using number of target page received to track one host page, we could have the capability to handle random order target page arrival in one host page. This is a preparation for enabling compress during postcopy. Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 460abfa2c3..a7414170e5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4050,7 +4050,7 @@ static int ram_load_postcopy(QEMUFile *f) MigrationIncomingState *mis = migration_incoming_get_current(); /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; - void *last_host = NULL; + void *this_host = NULL; bool all_zero = false; int target_pages = 0; @@ -4097,24 +4097,26 @@ static int ram_load_postcopy(QEMUFile *f) * that's moved into place later. * The migration protocol uses, possibly smaller, target-pages * however the source ensures it always sends all the components - * of a host page in order. + * of a host page in one chunk. */ page_buffer = postcopy_host_page + ((uintptr_t)host & (block->page_size - 1)); /* If all TP are zero then we can optimise the place */ if (target_pages == 1) { all_zero = true; + this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, + block->page_size); } else { /* not the 1st TP within the HP */ - if (host != (last_host + TARGET_PAGE_SIZE)) { - error_report("Non-sequential target page %p/%p", - host, last_host); + if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) != + (uintptr_t)this_host) { + error_report("Non-same host page %p/%p", + host, this_host); ret = -EINVAL; break; } } - /* * If it's the last part of a host page then we place the host * page @@ -4125,7 +4127,6 @@ static int ram_load_postcopy(QEMUFile *f) } place_source = postcopy_host_page; } - last_host = host; switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_ZERO: @@ -4178,7 +4179,8 @@ static int ram_load_postcopy(QEMUFile *f) if (!ret && place_needed) { /* This gets called at the last target page in the host page */ - void *place_dest = host + TARGET_PAGE_SIZE - block->page_size; + void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, + block->page_size); if (all_zero) { ret = postcopy_place_page_zero(mis, place_dest, From 644acf99b8cb8437f65600cf00c2e090bf3e3bc2 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 7 Nov 2019 20:39:07 +0800 Subject: [PATCH 19/29] migration/postcopy: enable compress during postcopy postcopy requires to place a whole host page, while migration thread migrate memory in target page size. This makes postcopy need to collect all target pages in one host page before placing via userfaultfd. To enable compress during postcopy, there are two problems to solve: 1. Random order for target page arrival 2. Target pages in one host page arrives without interrupt by target page from other host page The first one is handled by previous cleanup patch. This patch handles the second one by: 1. Flush compress thread for each host page 2. Wait for decompress thread for before placing host page Signed-off-by: Wei Yang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 11 ----------- migration/ram.c | 28 +++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index e55edee606..990bff00c0 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1005,17 +1005,6 @@ static bool migrate_caps_check(bool *cap_list, #endif if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { - /* The decompression threads asynchronously write into RAM - * rather than use the atomic copies needed to avoid - * userfaulting. It should be possible to fix the decompression - * threads for compatibility in future. - */ - error_setg(errp, "Postcopy is not currently compatible " - "with compression"); - return false; - } - /* This check is reasonably expensive, so only when it's being * set the first time, also it's only the destination that needs * special support. diff --git a/migration/ram.c b/migration/ram.c index a7414170e5..5f20c3d15d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3469,6 +3469,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) rs->target_page_count += pages; + /* + * During postcopy, it is necessary to make sure one whole host + * page is sent in one chunk. + */ + if (migrate_postcopy_ram()) { + flush_compressed_data(rs); + } + /* * we want to check in the 1st loop, just in case it was the 1st * time and we had to sync the dirty bitmap. @@ -4061,6 +4069,7 @@ static int ram_load_postcopy(QEMUFile *f) void *place_source = NULL; RAMBlock *block = NULL; uint8_t ch; + int len; addr = qemu_get_be64(f); @@ -4078,7 +4087,8 @@ static int ram_load_postcopy(QEMUFile *f) trace_ram_load_postcopy_loop((uint64_t)addr, flags); place_needed = false; - if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) { + if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | + RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); host = host_from_ram_block_offset(block, addr); @@ -4161,6 +4171,17 @@ static int ram_load_postcopy(QEMUFile *f) TARGET_PAGE_SIZE); } break; + case RAM_SAVE_FLAG_COMPRESS_PAGE: + all_zero = false; + len = qemu_get_be32(f); + if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; + } + decompress_data_with_multi_threads(f, page_buffer, len); + break; + case RAM_SAVE_FLAG_EOS: /* normal exit */ multifd_recv_sync_main(); @@ -4172,6 +4193,11 @@ static int ram_load_postcopy(QEMUFile *f) break; } + /* Got the whole host page, wait for decompress before placing. */ + if (place_needed) { + ret |= wait_for_decompress_done(); + } + /* Detect for any possible file errors */ if (!ret && qemu_file_get_error(f)) { ret = qemu_file_get_error(f); From eab54aa78ffd9fb7895b20fc2761ee998479489b Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 26 Oct 2019 07:19:59 +0800 Subject: [PATCH 20/29] migration/multifd: clean pages after filling packet This is a preparation for the next patch: not use multifd during postcopy. Without enabling postcopy, everything looks good. While after enabling postcopy, migration may fail even not use multifd during postcopy. The reason is the pages is not properly cleared and *old* target page will continue to be transferred. After clean pages, migration succeeds. Signed-off-by: Wei Yang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 5f20c3d15d..a05448c0c9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -955,10 +955,10 @@ static int multifd_send_pages(RAMState *rs) } qemu_mutex_unlock(&p->mutex); } - p->pages->used = 0; + assert(!p->pages->used); + assert(!p->pages->block); p->packet_num = multifd_send_state->packet_num++; - p->pages->block = NULL; multifd_send_state->pages = p->pages; p->pages = pages; transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; @@ -1154,6 +1154,8 @@ static void *multifd_send_thread(void *opaque) p->flags = 0; p->num_packets++; p->num_pages += used; + p->pages->used = 0; + p->pages->block = NULL; qemu_mutex_unlock(&p->mutex); trace_multifd_send(p->id, packet_num, used, flags, From c6b3a2e0c4342d171b35970921bff709c007dd62 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 26 Oct 2019 07:20:00 +0800 Subject: [PATCH 21/29] migration/multifd: not use multifd during postcopy We don't support multifd during postcopy, but user still could enable both multifd and postcopy. This leads to migration failure. Skip multifd during postcopy. Signed-off-by: Wei Yang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index a05448c0c9..d4f33a4f2f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2587,10 +2587,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, } /* - * do not use multifd for compression as the first page in the new - * block should be posted out before sending the compressed page + * Do not use multifd for: + * 1. Compression as the first page in the new block should be posted out + * before sending the compressed page + * 2. In postcopy as one whole host page should be placed */ - if (!save_page_use_compression(rs) && migrate_use_multifd()) { + if (!save_page_use_compression(rs) && migrate_use_multifd() + && !migration_in_postcopy()) { return ram_save_multifd_page(rs, block, offset); } From f76e32eb05041ab001184ab16afb56524adccd0c Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Wed, 23 Oct 2019 12:30:02 +0800 Subject: [PATCH 22/29] migration/multifd: fix nullptr access in terminating multifd threads One multifd channel will shutdown all the other multifd's IOChannel when it fails to receive an IOChannel. In this senario, if some multifds had not received its IOChannel yet, it would try to shutdown its IOChannel which could cause nullptr access at qio_channel_shutdown. Here is the coredump stack: #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 To fix it up, let's check p->c before calling qio_channel_shutdown. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index d4f33a4f2f..278b2ff87a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1308,7 +1308,9 @@ static void multifd_recv_terminate_threads(Error *err) - normal quit, i.e. everything went fine, just finished - error quit: We close the channels so the channel threads finish the qio_channel_read_all_eof() */ - qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } qemu_mutex_unlock(&p->mutex); } } From 9560a48ecc0c20d87bc458a6db77fba651605819 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Wed, 23 Oct 2019 11:47:37 +0800 Subject: [PATCH 23/29] migration/multifd: fix destroyed mutex access in terminating multifd threads One multifd will lock all the other multifds' IOChannel mutex to inform them to quit by setting p->quit or shutting down p->c. In this senario, if some multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup had destroyed their mutex, it could cause destroyed mutex access when trying lock their mutex. Here is the coredump stack: #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 To fix it up, let's destroy the mutex after all the other multifd threads had been terminated. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/ram.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 278b2ff87a..7221f54139 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1053,6 +1053,10 @@ void multifd_save_cleanup(void) if (p->running) { qemu_thread_join(&p->thread); } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + socket_send_channel_destroy(p->c); p->c = NULL; qemu_mutex_destroy(&p->mutex); @@ -1336,6 +1340,10 @@ int multifd_load_cleanup(Error **errp) qemu_sem_post(&p->sem_sync); qemu_thread_join(&p->thread); } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + object_unref(OBJECT(p->c)); p->c = NULL; qemu_mutex_destroy(&p->mutex); From 8bba004ccabd0d71e8b5ca056e576d2737c17305 Mon Sep 17 00:00:00 2001 From: Alexey Romko Date: Fri, 10 Jan 2020 14:51:34 +0100 Subject: [PATCH 24/29] Bug #1829242 correction. Added type conversions to ram_addr_t before all left shifts of page indexes to TARGET_PAGE_BITS, to correct overflows when the page address was 4Gb and more. Signed-off-by: Alexey Romko Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- migration/ram.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 7221f54139..d0940387d1 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1768,7 +1768,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { uint8_t shift = rb->clear_bmap_shift; hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); - hwaddr start = (page << TARGET_PAGE_BITS) & (-size); + hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); /* * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this @@ -2005,7 +2005,7 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages) return; } - ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); + ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS); } /* @@ -2093,7 +2093,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) uint8_t *p; bool send_async = true; RAMBlock *block = pss->block; - ram_addr_t offset = pss->page << TARGET_PAGE_BITS; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ram_addr_t current_addr = block->offset + offset; p = block->host + offset; @@ -2280,7 +2280,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) *again = false; return false; } - if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { + if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS) + >= pss->block->used_length) { /* Didn't find anything in this RAM Block */ pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); @@ -2571,7 +2572,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) { RAMBlock *block = pss->block; - ram_addr_t offset = pss->page << TARGET_PAGE_BITS; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; if (control_save_page(rs, block, offset, &res)) { @@ -2657,7 +2658,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, /* Allow rate limiting to happen in the middle of huge pages */ migration_rate_limit(); } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + offset_in_ramblock(pss->block, + ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); /* The offset we leave with is the last one we looked at */ pss->page--; @@ -2874,8 +2876,10 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) while (run_start < range) { unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); - ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, - (run_end - run_start) << TARGET_PAGE_BITS); + ram_discard_range(block->idstr, + ((ram_addr_t)run_start) << TARGET_PAGE_BITS, + ((ram_addr_t)(run_end - run_start)) + << TARGET_PAGE_BITS); run_start = find_next_zero_bit(bitmap, range, run_end + 1); } } @@ -4273,13 +4277,16 @@ static void colo_flush_ram_cache(void) while (block) { offset = migration_bitmap_find_dirty(ram_state, block, offset); - if (offset << TARGET_PAGE_BITS >= block->used_length) { + if (((ram_addr_t)offset) << TARGET_PAGE_BITS + >= block->used_length) { offset = 0; block = QLIST_NEXT_RCU(block, next); } else { migration_bitmap_clear_dirty(ram_state, block, offset); - dst_host = block->host + (offset << TARGET_PAGE_BITS); - src_host = block->colo_cache + (offset << TARGET_PAGE_BITS); + dst_host = block->host + + (((ram_addr_t)offset) << TARGET_PAGE_BITS); + src_host = block->colo_cache + + (((ram_addr_t)offset) << TARGET_PAGE_BITS); memcpy(dst_host, src_host, TARGET_PAGE_SIZE); } } From 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 16 Oct 2019 10:29:30 +0800 Subject: [PATCH 25/29] migration: Define VMSTATE_INSTANCE_ID_ANY Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to auto-generate the vmstate instance ID. Previously it was hard coded as -1 instead of this macro. It helps to change this default value in the follow up patches. No functional change. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- backends/dbus-vmstate.c | 3 ++- hw/arm/stellaris.c | 2 +- hw/core/qdev.c | 3 ++- hw/display/ads7846.c | 2 +- hw/i2c/core.c | 2 +- hw/input/stellaris_input.c | 3 ++- hw/intc/apic_common.c | 2 +- hw/misc/max111x.c | 3 ++- hw/net/eepro100.c | 3 ++- hw/pci/pci.c | 2 +- hw/ppc/spapr.c | 2 +- hw/timer/arm_timer.c | 2 +- hw/tpm/tpm_emulator.c | 3 ++- include/migration/vmstate.h | 2 ++ migration/savevm.c | 8 ++++---- 15 files changed, 25 insertions(+), 17 deletions(-) diff --git a/backends/dbus-vmstate.c b/backends/dbus-vmstate.c index 56b482a7d6..cc594a722e 100644 --- a/backends/dbus-vmstate.c +++ b/backends/dbus-vmstate.c @@ -412,7 +412,8 @@ dbus_vmstate_complete(UserCreatable *uc, Error **errp) return; } - if (vmstate_register(VMSTATE_IF(self), -1, &dbus_vmstate, self) < 0) { + if (vmstate_register(VMSTATE_IF(self), VMSTATE_INSTANCE_ID_ANY, + &dbus_vmstate, self) < 0) { error_setg(errp, "Failed to register vmstate"); } } diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index b198066b54..bb025e0bd0 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); memory_region_add_subregion(get_system_memory(), base, &s->iomem); ssys_reset(s); - vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); return 0; } diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 9f1753f5cf..58e87d336d 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -879,7 +879,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) if (qdev_get_vmsd(dev)) { if (vmstate_register_with_alias_id(VMSTATE_IF(dev), - -1, qdev_get_vmsd(dev), dev, + VMSTATE_INSTANCE_ID_ANY, + qdev_get_vmsd(dev), dev, dev->instance_id_alias, dev->alias_required_for_version, &local_err) < 0) { diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c index c12272ae72..9228b40b1a 100644 --- a/hw/display/ads7846.c +++ b/hw/display/ads7846.c @@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) ads7846_int_update(s); - vmstate_register(NULL, -1, &vmstate_ads7846, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); } static void ads7846_class_init(ObjectClass *klass, void *data) diff --git a/hw/i2c/core.c b/hw/i2c/core.c index 92cd489069..d770035ba0 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); QLIST_INIT(&bus->current_devs); - vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); return bus; } diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c index 59892b07fc..e6ee5e11f1 100644 --- a/hw/input/stellaris_input.c +++ b/hw/input/stellaris_input.c @@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) } s->num_buttons = n; qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); - vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, + &vmstate_stellaris_gamepad, s); } diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 375cb6abe9..f2c3a7f309 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } if (s->legacy_instance_id) { - instance_id = -1; + instance_id = VMSTATE_INSTANCE_ID_ANY; } vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, s, -1, 0, NULL); diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c index 211008ce02..2b87bdee5b 100644 --- a/hw/misc/max111x.c +++ b/hw/misc/max111x.c @@ -146,7 +146,8 @@ static int max111x_init(SSISlave *d, int inputs) s->input[7] = 0x80; s->com = 0; - vmstate_register(VMSTATE_IF(dev), -1, &vmstate_max111x, s); + vmstate_register(VMSTATE_IF(dev), VMSTATE_INSTANCE_ID_ANY, + &vmstate_max111x, s); return 0; } diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index cc71a7a036..6cc97769d9 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1874,7 +1874,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); s->vmstate->name = qemu_get_queue(s->nic)->model; - vmstate_register(VMSTATE_IF(&pci_dev->qdev), -1, s->vmstate, s); + vmstate_register(VMSTATE_IF(&pci_dev->qdev), VMSTATE_INSTANCE_ID_ANY, + s->vmstate, s); } static void eepro100_instance_init(Object *obj) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e3d310365d..3ac7961451 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -122,7 +122,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) bus->machine_done.notify = pcibus_machine_done; qemu_add_machine_init_done_notifier(&bus->machine_done); - vmstate_register(NULL, -1, &vmstate_pcibus, bus); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); } static void pcie_bus_realize(BusState *qbus, Error **errp) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 30a5fbd3be..02cf53fc5b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2948,7 +2948,7 @@ static void spapr_machine_init(MachineState *machine) * interface, this is a legacy from the sPAPREnvironment structure * which predated MachineState but had a similar function */ vmstate_register(NULL, 0, &vmstate_spapr, spapr); - register_savevm_live("spapr/htab", -1, 1, + register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, &savevm_htab_handlers, spapr); qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c index af524fabf7..beaa285685 100644 --- a/hw/timer/arm_timer.c +++ b/hw/timer/arm_timer.c @@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) s->control = TIMER_CTRL_IE; s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); - vmstate_register(NULL, -1, &vmstate_arm_timer, s); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); return s; } diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c index 10d587ed40..3a0fc442f3 100644 --- a/hw/tpm/tpm_emulator.c +++ b/hw/tpm/tpm_emulator.c @@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) tpm_emu->cur_locty_number = ~0; qemu_mutex_init(&tpm_emu->mutex); - vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, + &vmstate_tpm_emulator, obj); } /* diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 4aef72c426..ed74dd5624 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1157,6 +1157,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); +#define VMSTATE_INSTANCE_ID_ANY -1 + /* Returns: 0 on success, -1 on failure */ int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, const VMStateDescription *vmsd, diff --git a/migration/savevm.c b/migration/savevm.c index e57686bca7..8dab99efc4 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -775,7 +775,7 @@ int register_savevm_live(const char *idstr, pstrcat(se->idstr, sizeof(se->idstr), idstr); - if (instance_id == -1) { + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; @@ -842,14 +842,14 @@ int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); - se->compat->instance_id = instance_id == -1 ? + se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? calculate_compat_instance_id(vmsd->name) : instance_id; - instance_id = -1; + instance_id = VMSTATE_INSTANCE_ID_ANY; } } pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - if (instance_id == -1) { + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { se->instance_id = calculate_new_instance_id(se->idstr); } else { se->instance_id = instance_id; From 93062e23619e057743757ee53bf7f8e07f7a3710 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 16 Oct 2019 10:29:31 +0800 Subject: [PATCH 26/29] migration: Change SaveStateEntry.instance_id into uint32_t It was always used as 32bit, so define it as used to be clear. Instead of using -1 as the auto-gen magic value, we switch to UINT32_MAX. We also make sure that we don't auto-gen this value to avoid overflowed instance IDs without being noticed. Suggested-by: Juan Quintela Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- hw/intc/apic_common.c | 2 +- include/migration/register.h | 2 +- include/migration/vmstate.h | 2 +- migration/savevm.c | 18 ++++++++++-------- stubs/vmstate.c | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index f2c3a7f309..54b8731fca 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info; static DeviceState *vapic; - int instance_id = s->id; + uint32_t instance_id = s->id; info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); diff --git a/include/migration/register.h b/include/migration/register.h index 00c38ebe9f..c1dcff0f90 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -71,7 +71,7 @@ typedef struct SaveVMHandlers { } SaveVMHandlers; int register_savevm_live(const char *idstr, - int instance_id, + uint32_t instance_id, int version_id, const SaveVMHandlers *ops, void *opaque); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index ed74dd5624..01790b8d9b 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1160,7 +1160,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); #define VMSTATE_INSTANCE_ID_ANY -1 /* Returns: 0 on success, -1 on failure */ -int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, +int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, const VMStateDescription *vmsd, void *base, int alias_id, int required_for_version, diff --git a/migration/savevm.c b/migration/savevm.c index 8dab99efc4..adfdca26ac 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -233,7 +233,7 @@ typedef struct CompatEntry { typedef struct SaveStateEntry { QTAILQ_ENTRY(SaveStateEntry) entry; char idstr[256]; - int instance_id; + uint32_t instance_id; int alias_id; int version_id; /* version id read from the stream */ @@ -667,10 +667,10 @@ void dump_vmstate_json_to_file(FILE *out_file) fclose(out_file); } -static int calculate_new_instance_id(const char *idstr) +static uint32_t calculate_new_instance_id(const char *idstr) { SaveStateEntry *se; - int instance_id = 0; + uint32_t instance_id = 0; QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (strcmp(idstr, se->idstr) == 0 @@ -678,6 +678,8 @@ static int calculate_new_instance_id(const char *idstr) instance_id = se->instance_id + 1; } } + /* Make sure we never loop over without being noticed */ + assert(instance_id != VMSTATE_INSTANCE_ID_ANY); return instance_id; } @@ -755,7 +757,7 @@ static void savevm_state_handler_remove(SaveStateEntry *se) Meanwhile pass -1 as instance_id if you do not already have a clearly distinguishing id for all instances of your device class. */ int register_savevm_live(const char *idstr, - int instance_id, + uint32_t instance_id, int version_id, const SaveVMHandlers *ops, void *opaque) @@ -809,7 +811,7 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, void *opaque) } } -int vmstate_register_with_alias_id(VMStateIf *obj, int instance_id, +int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, const VMStateDescription *vmsd, void *opaque, int alias_id, int required_for_version, @@ -1625,7 +1627,7 @@ int qemu_save_device_state(QEMUFile *f) return qemu_file_get_error(f); } -static SaveStateEntry *find_se(const char *idstr, int instance_id) +static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) { SaveStateEntry *se; @@ -2292,7 +2294,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) /* Find savevm section */ se = find_se(idstr, instance_id); if (se == NULL) { - error_report("Unknown savevm section or instance '%s' %d. " + error_report("Unknown savevm section or instance '%s' %"PRIu32". " "Make sure that your current VM setup matches your " "saved VM setup, including any hotplugged devices", idstr, instance_id); @@ -2316,7 +2318,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) ret = vmstate_load(f, se); if (ret < 0) { - error_report("error while loading state for instance 0x%x of" + error_report("error while loading state for instance 0x%"PRIx32" of" " device '%s'", instance_id, idstr); return ret; } diff --git a/stubs/vmstate.c b/stubs/vmstate.c index 6951d9fdc5..cc4fe41dfc 100644 --- a/stubs/vmstate.c +++ b/stubs/vmstate.c @@ -4,7 +4,7 @@ const VMStateDescription vmstate_dummy = {}; int vmstate_register_with_alias_id(VMStateIf *obj, - int instance_id, + uint32_t instance_id, const VMStateDescription *vmsd, void *base, int alias_id, int required_for_version, From 0ab994867c365db21e15f9503922c79234d8e40e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 16 Oct 2019 10:29:32 +0800 Subject: [PATCH 27/29] apic: Use 32bit APIC ID for migration instance ID Migration is silently broken now with x2apic config like this: -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ -device intel-iommu,intremap=on,eim=on After migration, the guest kernel could hang at anything, due to x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so any operations related to x2apic could be broken then (e.g., RDMSR on x2apic MSRs could fail because KVM would think that the vcpu hasn't enabled x2apic at all). The issue is that the x2apic bit was never applied correctly for vcpus whose ID > 255 when migrate completes, and that's because when we migrate APIC we use the APICCommonState.id as instance ID of the migration stream, while that's too short for x2apic. Let's use the newly introduced initial_apic_id for that. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Reviewed-by: Eduardo Habkost Signed-off-by: Juan Quintela --- hw/intc/apic_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 54b8731fca..b5dbeb6206 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info; static DeviceState *vapic; - uint32_t instance_id = s->id; + uint32_t instance_id = s->initial_apic_id; + + /* Normally initial APIC ID should be no more than hundreds */ + assert(instance_id != VMSTATE_INSTANCE_ID_ANY); info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); From 4746dbf8a98d560e20dbd22d0e8405b38478b409 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 13 Jan 2020 14:48:23 +0100 Subject: [PATCH 28/29] migration: Support QLIST migration Support QLIST migration using the same principle as QTAILQ: 94869d5c52 ("migration: migrate QTAILQ"). The VMSTATE_QLIST_V macro has the same proto as VMSTATE_QTAILQ_V. The change mainly resides in QLIST RAW macros: QLIST_RAW_INSERT_HEAD and QLIST_RAW_REVERSE. Tests also are provided. Signed-off-by: Eric Auger Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- include/migration/vmstate.h | 21 +++++ include/qemu/queue.h | 39 +++++++++ migration/trace-events | 5 ++ migration/vmstate-types.c | 70 +++++++++++++++ tests/test-vmstate.c | 170 ++++++++++++++++++++++++++++++++++++ 5 files changed, 305 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 01790b8d9b..30667631bc 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -229,6 +229,7 @@ extern const VMStateInfo vmstate_info_tmp; extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; extern const VMStateInfo vmstate_info_gtree; +extern const VMStateInfo vmstate_info_qlist; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) /* @@ -798,6 +799,26 @@ extern const VMStateInfo vmstate_info_gtree; .offset = offsetof(_state, _field), \ } +/* + * For migrating a QLIST + * Target QLIST needs be properly initialized. + * _type: type of QLIST element + * _next: name of QLIST_ENTRY entry field in QLIST element + * _vmsd: VMSD for QLIST element + * size: size of QLIST element + * start: offset of QLIST_ENTRY in QTAILQ element + */ +#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = &(_vmsd), \ + .size = sizeof(_type), \ + .info = &vmstate_info_qlist, \ + .offset = offsetof(_state, _field), \ + .start = offsetof(_type, _next), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements diff --git a/include/qemu/queue.h b/include/qemu/queue.h index 4764d93ea3..4d4554a7ce 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -501,4 +501,43 @@ union { \ QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \ } while (/*CONSTCOND*/0) +#define QLIST_RAW_FIRST(head) \ + field_at_offset(head, 0, void *) + +#define QLIST_RAW_NEXT(elm, entry) \ + field_at_offset(elm, entry, void *) + +#define QLIST_RAW_PREVIOUS(elm, entry) \ + field_at_offset(elm, entry + sizeof(void *), void *) + +#define QLIST_RAW_FOREACH(elm, head, entry) \ + for ((elm) = *QLIST_RAW_FIRST(head); \ + (elm); \ + (elm) = *QLIST_RAW_NEXT(elm, entry)) + +#define QLIST_RAW_INSERT_HEAD(head, elm, entry) do { \ + void *first = *QLIST_RAW_FIRST(head); \ + *QLIST_RAW_FIRST(head) = elm; \ + *QLIST_RAW_PREVIOUS(elm, entry) = QLIST_RAW_FIRST(head); \ + if (first) { \ + *QLIST_RAW_NEXT(elm, entry) = first; \ + *QLIST_RAW_PREVIOUS(first, entry) = QLIST_RAW_NEXT(elm, entry); \ + } else { \ + *QLIST_RAW_NEXT(elm, entry) = NULL; \ + } \ +} while (0) + +#define QLIST_RAW_REVERSE(head, elm, entry) do { \ + void *iter = *QLIST_RAW_FIRST(head), *prev = NULL, *next; \ + while (iter) { \ + next = *QLIST_RAW_NEXT(iter, entry); \ + *QLIST_RAW_PREVIOUS(iter, entry) = QLIST_RAW_NEXT(next, entry); \ + *QLIST_RAW_NEXT(iter, entry) = prev; \ + prev = iter; \ + iter = next; \ + } \ + *QLIST_RAW_FIRST(head) = prev; \ + *QLIST_RAW_PREVIOUS(prev, entry) = QLIST_RAW_FIRST(head); \ +} while (0) + #endif /* QEMU_SYS_QUEUE_H */ diff --git a/migration/trace-events b/migration/trace-events index 2f9129e213..4ab0a503d2 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -76,6 +76,11 @@ get_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val put_gtree(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, uint32_t nnodes) "%s(%s/%s) nnodes=%d" put_gtree_end(const char *field_name, const char *key_vmsd_name, const char *val_vmsd_name, int ret) "%s(%s/%s) %d" +get_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)" +get_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" +put_qlist(const char *field_name, const char *vmsd_name, int version_id) "%s(%s v%d)" +put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" + # qemu-file.c qemu_file_fclose(void) "" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index 7236cf92bc..1eee36773a 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -843,3 +843,73 @@ const VMStateInfo vmstate_info_gtree = { .get = get_gtree, .put = put_gtree, }; + +static int put_qlist(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field, QJSON *vmdesc) +{ + const VMStateDescription *vmsd = field->vmsd; + /* offset of the QTAILQ entry in a QTAILQ element*/ + size_t entry_offset = field->start; + void *elm; + int ret; + + trace_put_qlist(field->name, vmsd->name, vmsd->version_id); + QLIST_RAW_FOREACH(elm, pv, entry_offset) { + qemu_put_byte(f, true); + ret = vmstate_save_state(f, vmsd, elm, vmdesc); + if (ret) { + error_report("%s: failed to save %s (%d)", field->name, + vmsd->name, ret); + return ret; + } + } + qemu_put_byte(f, false); + trace_put_qlist_end(field->name, vmsd->name); + + return 0; +} + +static int get_qlist(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field) +{ + int ret = 0; + const VMStateDescription *vmsd = field->vmsd; + /* size of a QLIST element */ + size_t size = field->size; + /* offset of the QLIST entry in a QLIST element */ + size_t entry_offset = field->start; + int version_id = field->version_id; + void *elm; + + trace_get_qlist(field->name, vmsd->name, vmsd->version_id); + if (version_id > vmsd->version_id) { + error_report("%s %s", vmsd->name, "too new"); + return -EINVAL; + } + if (version_id < vmsd->minimum_version_id) { + error_report("%s %s", vmsd->name, "too old"); + return -EINVAL; + } + + while (qemu_get_byte(f)) { + elm = g_malloc(size); + ret = vmstate_load_state(f, vmsd, elm, version_id); + if (ret) { + error_report("%s: failed to load %s (%d)", field->name, + vmsd->name, ret); + g_free(elm); + return ret; + } + QLIST_RAW_INSERT_HEAD(pv, elm, entry_offset); + } + QLIST_RAW_REVERSE(pv, elm, entry_offset); + trace_get_qlist_end(field->name, vmsd->name); + + return ret; +} + +const VMStateInfo vmstate_info_qlist = { + .name = "qlist", + .get = get_qlist, + .put = put_qlist, +}; diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c index 8f184f3556..cea363dd69 100644 --- a/tests/test-vmstate.c +++ b/tests/test-vmstate.c @@ -926,6 +926,28 @@ static const VMStateDescription vmstate_domain = { } }; +/* test QLIST Migration */ + +typedef struct TestQListElement { + uint32_t id; + QLIST_ENTRY(TestQListElement) next; +} TestQListElement; + +typedef struct TestQListContainer { + uint32_t id; + QLIST_HEAD(, TestQListElement) list; +} TestQListContainer; + +static const VMStateDescription vmstate_qlist_element = { + .name = "test/queue list", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, TestQListElement), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_iommu = { .name = "iommu", .version_id = 1, @@ -939,6 +961,18 @@ static const VMStateDescription vmstate_iommu = { } }; +static const VMStateDescription vmstate_container = { + .name = "test/container/qlist", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(id, TestQListContainer), + VMSTATE_QLIST_V(list, TestQListContainer, 1, vmstate_qlist_element, + TestQListElement, next), + VMSTATE_END_OF_LIST() + } +}; + uint8_t first_domain_dump[] = { /* id */ 0x00, 0x0, 0x0, 0x6, @@ -1229,6 +1263,140 @@ static void test_gtree_load_iommu(void) qemu_fclose(fload); } +static uint8_t qlist_dump[] = { + 0x00, 0x00, 0x00, 0x01, /* container id */ + 0x1, /* start of a */ + 0x00, 0x00, 0x00, 0x0a, + 0x1, /* start of b */ + 0x00, 0x00, 0x0b, 0x00, + 0x1, /* start of c */ + 0x00, 0x0c, 0x00, 0x00, + 0x1, /* start of d */ + 0x0d, 0x00, 0x00, 0x00, + 0x0, /* end of list */ + QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */ +}; + +static TestQListContainer *alloc_container(void) +{ + TestQListElement *a = g_malloc(sizeof(TestQListElement)); + TestQListElement *b = g_malloc(sizeof(TestQListElement)); + TestQListElement *c = g_malloc(sizeof(TestQListElement)); + TestQListElement *d = g_malloc(sizeof(TestQListElement)); + TestQListContainer *container = g_malloc(sizeof(TestQListContainer)); + + a->id = 0x0a; + b->id = 0x0b00; + c->id = 0xc0000; + d->id = 0xd000000; + container->id = 1; + + QLIST_INIT(&container->list); + QLIST_INSERT_HEAD(&container->list, d, next); + QLIST_INSERT_HEAD(&container->list, c, next); + QLIST_INSERT_HEAD(&container->list, b, next); + QLIST_INSERT_HEAD(&container->list, a, next); + return container; +} + +static void free_container(TestQListContainer *container) +{ + TestQListElement *iter, *tmp; + + QLIST_FOREACH_SAFE(iter, &container->list, next, tmp) { + QLIST_REMOVE(iter, next); + g_free(iter); + } + g_free(container); +} + +static void compare_containers(TestQListContainer *c1, TestQListContainer *c2) +{ + TestQListElement *first_item_c1, *first_item_c2; + + while (!QLIST_EMPTY(&c1->list)) { + first_item_c1 = QLIST_FIRST(&c1->list); + first_item_c2 = QLIST_FIRST(&c2->list); + assert(first_item_c2); + assert(first_item_c1->id == first_item_c2->id); + QLIST_REMOVE(first_item_c1, next); + QLIST_REMOVE(first_item_c2, next); + g_free(first_item_c1); + g_free(first_item_c2); + } + assert(QLIST_EMPTY(&c2->list)); +} + +/* + * Check the prev & next fields are correct by doing list + * manipulations on the container. We will do that for both + * the source and the destination containers + */ +static void manipulate_container(TestQListContainer *c) +{ + TestQListElement *prev = NULL, *iter = QLIST_FIRST(&c->list); + TestQListElement *elem; + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x12; + QLIST_INSERT_AFTER(iter, elem, next); + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x13; + QLIST_INSERT_HEAD(&c->list, elem, next); + + while (iter) { + prev = iter; + iter = QLIST_NEXT(iter, next); + } + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x14; + QLIST_INSERT_BEFORE(prev, elem, next); + + elem = g_malloc(sizeof(TestQListElement)); + elem->id = 0x15; + QLIST_INSERT_AFTER(prev, elem, next); + + QLIST_REMOVE(prev, next); + g_free(prev); +} + +static void test_save_qlist(void) +{ + TestQListContainer *container = alloc_container(); + + save_vmstate(&vmstate_container, container); + compare_vmstate(qlist_dump, sizeof(qlist_dump)); + free_container(container); +} + +static void test_load_qlist(void) +{ + QEMUFile *fsave, *fload; + TestQListContainer *orig_container = alloc_container(); + TestQListContainer *dest_container = g_malloc0(sizeof(TestQListContainer)); + char eof; + + QLIST_INIT(&dest_container->list); + + fsave = open_test_file(true); + qemu_put_buffer(fsave, qlist_dump, sizeof(qlist_dump)); + g_assert(!qemu_file_get_error(fsave)); + qemu_fclose(fsave); + + fload = open_test_file(false); + vmstate_load_state(fload, &vmstate_container, dest_container, 1); + eof = qemu_get_byte(fload); + g_assert(!qemu_file_get_error(fload)); + g_assert_cmpint(eof, ==, QEMU_VM_EOF); + manipulate_container(orig_container); + manipulate_container(dest_container); + compare_containers(orig_container, dest_container); + free_container(orig_container); + free_container(dest_container); +} + typedef struct TmpTestStruct { TestStruct *parent; int64_t diff; @@ -1353,6 +1521,8 @@ int main(int argc, char **argv) g_test_add_func("/vmstate/gtree/load/loaddomain", test_gtree_load_domain); g_test_add_func("/vmstate/gtree/save/saveiommu", test_gtree_save_iommu); g_test_add_func("/vmstate/gtree/load/loadiommu", test_gtree_load_iommu); + g_test_add_func("/vmstate/qlist/save/saveqlist", test_save_qlist); + g_test_add_func("/vmstate/qlist/load/loadqlist", test_load_qlist); g_test_add_func("/vmstate/tmp_struct", test_tmp_struct); g_test_run(); From ddac5cb2d95774cd019bfaf93c54ffd921095fea Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 14 Jan 2020 12:24:09 +0100 Subject: [PATCH 29/29] multifd: Be consistent about using uint64_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We transmit ram_addr_t always as uint64_t. Be consistent in its use (on 64bit system, it is always uint64_t problem is 32bits). Signed-off-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Dr. David Alan Gilbert --- migration/ram.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index d0940387d1..d2208b5534 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -803,7 +803,10 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) } for (i = 0; i < p->pages->used; i++) { - packet->offset[i] = cpu_to_be64(p->pages->offset[i]); + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->pages->offset[i]; + + packet->offset[i] = cpu_to_be64(temp); } } @@ -877,10 +880,10 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) } for (i = 0; i < p->pages->used; i++) { - ram_addr_t offset = be64_to_cpu(packet->offset[i]); + uint64_t offset = be64_to_cpu(packet->offset[i]); if (offset > (block->used_length - TARGET_PAGE_SIZE)) { - error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT + error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", offset, block->max_length); return -1; @@ -1267,7 +1270,7 @@ int multifd_save_setup(void) p->id = i; p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(ram_addr_t) * page_count; + + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); p->packet->version = cpu_to_be32(MULTIFD_VERSION); @@ -1484,7 +1487,7 @@ int multifd_load_setup(void) p->id = i; p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(ram_addr_t) * page_count; + + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdrecv_%d", i); }