diff --git a/include/migration/misc.h b/include/migration/misc.h index c0e23fdac9..67f7ef7a0e 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -104,4 +104,8 @@ bool migration_incoming_postcopy_advised(void); /* True if background snapshot is active */ bool migration_in_bg_snapshot(void); +/* Wrapper for block active/inactive operations */ +bool migration_block_activate(Error **errp); +bool migration_block_inactivate(void); + #endif diff --git a/migration/block-active.c b/migration/block-active.c new file mode 100644 index 0000000000..d477cf8182 --- /dev/null +++ b/migration/block-active.c @@ -0,0 +1,94 @@ +/* + * Block activation tracking for migration purpose + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Copyright (C) 2024 Red Hat, Inc. + */ +#include "qemu/osdep.h" +#include "block/block.h" +#include "qapi/error.h" +#include "migration/migration.h" +#include "qemu/error-report.h" +#include "trace.h" + +/* + * Migration-only cache to remember the block layer activation status. + * Protected by BQL. + * + * We need this because.. + * + * - Migration can fail after block devices are invalidated (during + * switchover phase). When that happens, we need to be able to recover + * the block drive status by re-activating them. + * + * - Currently bdrv_inactivate_all() is not safe to be invoked on top of + * invalidated drives (even if bdrv_activate_all() is actually safe to be + * called any time!). It means remembering this could help migration to + * make sure it won't invalidate twice in a row, crashing QEMU. It can + * happen when we migrate a PAUSED VM from host1 to host2, then migrate + * again to host3 without starting it. TODO: a cleaner solution is to + * allow safe invoke of bdrv_inactivate_all() at anytime, like + * bdrv_activate_all(). + * + * For freshly started QEMU, the flag is initialized to TRUE reflecting the + * scenario where QEMU owns block device ownerships. + * + * For incoming QEMU taking a migration stream, the flag is initialized to + * FALSE reflecting that the incoming side doesn't own the block devices, + * not until switchover happens. + */ +static bool migration_block_active; + +/* Setup the disk activation status */ +void migration_block_active_setup(bool active) +{ + migration_block_active = active; +} + +bool migration_block_activate(Error **errp) +{ + ERRP_GUARD(); + + assert(bql_locked()); + + if (migration_block_active) { + trace_migration_block_activation("active-skipped"); + return true; + } + + trace_migration_block_activation("active"); + + bdrv_activate_all(errp); + if (*errp) { + error_report_err(error_copy(*errp)); + return false; + } + + migration_block_active = true; + return true; +} + +bool migration_block_inactivate(void) +{ + int ret; + + assert(bql_locked()); + + if (!migration_block_active) { + trace_migration_block_activation("inactive-skipped"); + return true; + } + + trace_migration_block_activation("inactive"); + + ret = bdrv_inactivate_all(); + if (ret) { + error_report("%s: bdrv_inactivate_all() failed: %d", + __func__, ret); + return false; + } + + migration_block_active = false; + return true; +} diff --git a/migration/colo.c b/migration/colo.c index afc9869020..9a8e5fbe9b 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -836,7 +836,7 @@ static void *colo_process_incoming_thread(void *opaque) /* Make sure all file formats throw away their mutable metadata */ bql_lock(); - bdrv_activate_all(&local_err); + migration_block_activate(&local_err); bql_unlock(); if (local_err) { error_report_err(local_err); diff --git a/migration/meson.build b/migration/meson.build index d53cf3417a..dac687ee3a 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -11,6 +11,7 @@ migration_files = files( system_ss.add(files( 'block-dirty-bitmap.c', + 'block-active.c', 'channel.c', 'channel-block.c', 'cpu-throttle.c', diff --git a/migration/migration.c b/migration/migration.c index fd42a549e6..2d1da917c7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -738,7 +738,6 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, static void process_incoming_migration_bh(void *opaque) { - Error *local_err = NULL; MigrationIncomingState *mis = opaque; trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); @@ -769,11 +768,7 @@ static void process_incoming_migration_bh(void *opaque) * Make sure all file formats throw away their mutable * metadata. If error, don't restart the VM yet. */ - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); - local_err = NULL; - } else { + if (migration_block_activate(NULL)) { vm_start(); } } else { @@ -1560,16 +1555,6 @@ static void migrate_fd_cancel(MigrationState *s) } } } - if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { - Error *local_err = NULL; - - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); - } else { - s->block_inactive = false; - } - } } void migration_add_notifier_mode(NotifierWithReturn *notify, @@ -1853,6 +1838,12 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, return; } + /* + * Newly setup incoming QEMU. Mark the block active state to reflect + * that the src currently owns the disks. + */ + migration_block_active_setup(false); + once = false; } @@ -2505,7 +2496,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) QIOChannelBuffer *bioc; QEMUFile *fb; uint64_t bandwidth = migrate_max_postcopy_bandwidth(); - bool restart_block = false; int cur_state = MIGRATION_STATUS_ACTIVE; if (migrate_postcopy_preempt()) { @@ -2541,13 +2531,10 @@ static int postcopy_start(MigrationState *ms, Error **errp) goto fail; } - ret = bdrv_inactivate_all(); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()", - __func__); + if (!migration_block_inactivate()) { + error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__); goto fail; } - restart_block = true; /* * Cause any non-postcopiable, but iterative devices to @@ -2617,8 +2604,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) goto fail_closefb; } - restart_block = false; - /* Now send that blob */ if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) { error_setg(errp, "%s: Failed to send packaged data", __func__); @@ -2663,17 +2648,7 @@ fail_closefb: fail: migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, MIGRATION_STATUS_FAILED); - if (restart_block) { - /* A failure happened early enough that we know the destination hasn't - * accessed block devices, so we're safe to recover. - */ - Error *local_err = NULL; - - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); - } - } + migration_block_activate(NULL); migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL); bql_unlock(); return -1; @@ -2771,31 +2746,6 @@ static void migration_completion_postcopy(MigrationState *s) trace_migration_completion_postcopy_end_after_complete(); } -static void migration_completion_failed(MigrationState *s, - int current_active_state) -{ - if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || - s->state == MIGRATION_STATUS_DEVICE)) { - /* - * If not doing postcopy, vm_start() will be called: let's - * regain control on images. - */ - Error *local_err = NULL; - - bql_lock(); - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); - } else { - s->block_inactive = false; - } - bql_unlock(); - } - - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); -} - /** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. @@ -2849,7 +2799,8 @@ fail: error_free(local_err); } - migration_completion_failed(s, current_active_state); + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); } /** @@ -3279,6 +3230,11 @@ static void migration_iteration_finish(MigrationState *s) case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: + /* + * Re-activate the block drives if they're inactivated. Note, COLO + * shouldn't use block_active at all, so it should be no-op there. + */ + migration_block_activate(NULL); if (runstate_is_live(s->vm_old_state)) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { vm_start(); @@ -3852,6 +3808,8 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; ms->pages_per_second = -1; + /* Freshly started QEMU owns all the block devices */ + migration_block_active_setup(true); qemu_sem_init(&ms->pause_sem, 0); qemu_mutex_init(&ms->error_mutex); diff --git a/migration/migration.h b/migration/migration.h index 7b6e718690..0df2a187af 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -370,9 +370,6 @@ struct MigrationState { /* Flag set once the migration thread is running (and needs joining) */ bool migration_thread_running; - /* Flag set once the migration thread called bdrv_inactivate_all */ - bool block_inactive; - /* Migration is waiting for guest to unplug device */ QemuSemaphore wait_unplug_sem; @@ -556,4 +553,7 @@ void migration_bitmap_sync_precopy(bool last_stage); /* migration/block-dirty-bitmap.c */ void dirty_bitmap_mig_init(void); +/* migration/block-active.c */ +void migration_block_active_setup(bool active); + #endif diff --git a/migration/savevm.c b/migration/savevm.c index 3a414aff52..c929da1ca5 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1547,19 +1547,18 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, } if (inactivate_disks) { - /* Inactivate before sending QEMU_VM_EOF so that the - * bdrv_activate_all() on the other end won't fail. */ - ret = bdrv_inactivate_all(); - if (ret) { - error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)", - __func__, ret); + /* + * Inactivate before sending QEMU_VM_EOF so that the + * bdrv_activate_all() on the other end won't fail. + */ + if (!migration_block_inactivate()) { + error_setg(&local_err, "%s: bdrv_inactivate_all() failed", + __func__); migrate_set_error(ms, local_err); error_report_err(local_err); - qemu_file_set_error(f, ret); + qemu_file_set_error(f, -EFAULT); return ret; } - /* Remember that we did this */ - s->block_inactive = true; } if (!in_postcopy) { /* Postcopy stream will still be going */ @@ -2123,7 +2122,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) static void loadvm_postcopy_handle_run_bh(void *opaque) { - Error *local_err = NULL; MigrationIncomingState *mis = opaque; trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter"); @@ -2146,12 +2144,11 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) * Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_activate_all(&local_err); + bool success = migration_block_activate(NULL); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated"); - if (local_err) { - error_report_err(local_err); - local_err = NULL; - } else { + + if (success) { vm_start(); } } else { @@ -3193,11 +3190,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, * side of the migration take control of the images. */ if (live && !saved_vm_running) { - ret = bdrv_inactivate_all(); - if (ret) { - error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)", - __func__, ret); - } + migration_block_inactivate(); } } diff --git a/migration/trace-events b/migration/trace-events index bb0e0cc6dc..b82a1c5e40 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -383,3 +383,6 @@ migration_pagecache_insert(void) "Error allocating page" # cpu-throttle.c cpu_throttle_set(int new_throttle_pct) "set guest CPU throttled by %d%%" cpu_throttle_dirty_sync(void) "" + +# block-active.c +migration_block_activation(const char *name) "%s" diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index 415e645130..1ca44fbd72 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -31,6 +31,7 @@ #include "qapi/type-helpers.h" #include "hw/mem/memory-device.h" #include "hw/intc/intc.h" +#include "migration/misc.h" NameInfo *qmp_query_name(Error **errp) { @@ -103,13 +104,8 @@ void qmp_cont(Error **errp) * Continuing after completed migration. Images have been * inactivated to allow the destination to take control. Need to * get control back now. - * - * If there are no inactive block nodes (e.g. because the VM was - * just paused rather than completing a migration), - * bdrv_inactivate_all() simply doesn't do anything. */ - bdrv_activate_all(&local_err); - if (local_err) { + if (!migration_block_activate(&local_err)) { error_propagate(errp, local_err); return; }