diff --git a/migration/migration.c b/migration/migration.c index 9e9db26667..0bf70ea971 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -402,10 +402,23 @@ void migration_incoming_state_destroy(void) struct MigrationIncomingState *mis = migration_incoming_get_current(); multifd_recv_cleanup(); + /* * RAM state cleanup needs to happen after multifd cleanup, because * multifd threads can use some of its states (receivedmap). + * The VFIO load_cleanup() implementation is BQL-sensitive. It requires + * BQL must NOT be taken when recycling load threads, so that it won't + * block the load threads from making progress on address space + * modification operations. + * + * To make it work, we could try to not take BQL for all load_cleanup(), + * or conditionally unlock BQL only if bql_locked() in VFIO. + * + * Since most existing call sites take BQL for load_cleanup(), make + * it simple by taking BQL always as the rule, so that VFIO can unlock + * BQL and retake unconditionally. */ + assert(bql_locked()); qemu_loadvm_state_cleanup(); if (mis->to_src_file) { diff --git a/migration/savevm.c b/migration/savevm.c index 7c1aa8ad7b..3e86b572cf 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1986,6 +1986,8 @@ static void *postcopy_ram_listen_thread(void *opaque) * in qemu_file, and thus we must be blocking now. */ qemu_file_set_blocking(f, true); + + /* TODO: sanity check that only postcopiable data will be loaded here */ load_res = qemu_loadvm_state_main(f, mis); /* @@ -2046,7 +2048,9 @@ static void *postcopy_ram_listen_thread(void *opaque) * (If something broke then qemu will have to exit anyway since it's * got a bad migration state). */ + bql_lock(); migration_incoming_state_destroy(); + bql_unlock(); rcu_unregister_thread(); mis->have_listen_thread = false;