Migration pull request
- compression: Shameer's fix for CONFIG_UADK build Yuan Liu fixes for zero-page, QPL, qatzip - multifd sync cleanups, prereq. for VFIO and postcopy work - fixes for 9.2 regressions: multifd with pre-9.0 -> post-9.1 migrations (#2720) s390x migration (#2704) - fix for assertions during paused migrations; rework of late-block-activate logic (#2395, #686) - fixes for compressed arrays creation and parsing, mostly affecting s390x -----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmeBDgkQHGZhcm9zYXNA c3VzZS5kZQAKCRDHmNx0G+wxnSlUEACl31wY+77JxWnBva/eDDwnJ9HiCrqsoqaZ YIJJXNlk4lYJWNdZRt6p27exzWrQwm+kWKPECeCakgCMlfhnKCvejGq7iV/fJY4o D8hjE3t1htQ8mfblY1+bqzg3Rml59KwXxiqAwvlljbNWdkXruv026dq9vgJMzFhi ia043fOO1tYULIoawgmwmLEHnztht0v+ZTZ1v5KQbrH655tpxls/8kHc6v5PXEpA 3PSmCrCQh1dPtkYRjuJ9yHyfU+/T8tYwIjrU6VR1wQW7MBNkjtqNudaqAFiuyuqn P8gh4rAQrMhA9y+aq6xSoJP8XGkuOHxLQtlNutlmtbcQyZ7JqgLmK9ZLdoPf21sK //erV63NoyaciYB9Nk3NXflwroc6zyvo8A584kGNPwBznZOJLESP4SPvVm/nlE29 vbyq8AWHRjFiqqf6P0ttQLAFkusZJzM1Y9UakF51hyVBX70yfqLG20XXZtIq/aZA GbBB2Fo0MIlbmWaur3vLsSzn7B8d++Gl9TTGcK/eIXJ1ANCuCxGv9fbXJQlP5F4I 3OAoSmAVJ2eqw4v0+2WMiEa8yUA5drNnDSI3VRkG+0K9jRfHKXki466/QQdGrNw7 8GuuzLBNai3gEKbavDU0Be73r982KjXeYXj7RuAkQfm0d4H7tiwtg91Cd1dPKfzh mhpmOFJDCg== =joNM -----END PGP SIGNATURE----- Merge tag 'migration-20250110-pull-request' of https://gitlab.com/farosas/qemu into staging Migration pull request - compression: Shameer's fix for CONFIG_UADK build Yuan Liu fixes for zero-page, QPL, qatzip - multifd sync cleanups, prereq. for VFIO and postcopy work - fixes for 9.2 regressions: multifd with pre-9.0 -> post-9.1 migrations (#2720) s390x migration (#2704) - fix for assertions during paused migrations; rework of late-block-activate logic (#2395, #686) - fixes for compressed arrays creation and parsing, mostly affecting s390x # -----BEGIN PGP SIGNATURE----- # # iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmeBDgkQHGZhcm9zYXNA # c3VzZS5kZQAKCRDHmNx0G+wxnSlUEACl31wY+77JxWnBva/eDDwnJ9HiCrqsoqaZ # YIJJXNlk4lYJWNdZRt6p27exzWrQwm+kWKPECeCakgCMlfhnKCvejGq7iV/fJY4o # D8hjE3t1htQ8mfblY1+bqzg3Rml59KwXxiqAwvlljbNWdkXruv026dq9vgJMzFhi # ia043fOO1tYULIoawgmwmLEHnztht0v+ZTZ1v5KQbrH655tpxls/8kHc6v5PXEpA # 3PSmCrCQh1dPtkYRjuJ9yHyfU+/T8tYwIjrU6VR1wQW7MBNkjtqNudaqAFiuyuqn # P8gh4rAQrMhA9y+aq6xSoJP8XGkuOHxLQtlNutlmtbcQyZ7JqgLmK9ZLdoPf21sK # //erV63NoyaciYB9Nk3NXflwroc6zyvo8A584kGNPwBznZOJLESP4SPvVm/nlE29 # vbyq8AWHRjFiqqf6P0ttQLAFkusZJzM1Y9UakF51hyVBX70yfqLG20XXZtIq/aZA # GbBB2Fo0MIlbmWaur3vLsSzn7B8d++Gl9TTGcK/eIXJ1ANCuCxGv9fbXJQlP5F4I # 3OAoSmAVJ2eqw4v0+2WMiEa8yUA5drNnDSI3VRkG+0K9jRfHKXki466/QQdGrNw7 # 8GuuzLBNai3gEKbavDU0Be73r982KjXeYXj7RuAkQfm0d4H7tiwtg91Cd1dPKfzh # mhpmOFJDCg== # =joNM # -----END PGP SIGNATURE----- # gpg: Signature made Fri 10 Jan 2025 07:09:45 EST # gpg: using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D # gpg: issuer "farosas@suse.de" # gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown] # gpg: aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3 64CF C798 DC74 1BEC 319D * tag 'migration-20250110-pull-request' of https://gitlab.com/farosas/qemu: (25 commits) multifd: bugfix for incorrect migration data with qatzip compression multifd: bugfix for incorrect migration data with QPL compression multifd: bugfix for migration using compression methods s390x: Fix CSS migration migration: Fix arrays of pointers in JSON writer migration: Dump correct JSON format for nullptr replacement migration: Rename vmstate_info_nullptr migration: Fix parsing of s390 stream migration: Remove unused argument in vmsd_desc_field_end migration: Add more error handling to analyze-migration.py migration/block: Rewrite disk activation migration/block: Fix possible race with block_inactive migration/block: Apply late-block-active behavior to postcopy migration/block: Make late-block-active the default qmp/cont: Only activate disks if migration completed migration: Add helper to get target runstate migration/multifd: Fix compat with QEMU < 9.0 migration/multifd: Document the reason to sync for save_setup() migration/multifd: Cleanup src flushes on condition check migration/multifd: Remove sync processing on postcopy ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
3214bec13d
@ -1244,6 +1244,7 @@ static void ccw_machine_2_9_instance_options(MachineState *machine)
|
|||||||
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ZPCI);
|
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ZPCI);
|
||||||
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_INT_SUPPRESSION);
|
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_INT_SUPPRESSION);
|
||||||
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_EVENT_NOTIFICATION);
|
s390_cpudef_featoff_greater(12, 1, S390_FEAT_ADAPTER_EVENT_NOTIFICATION);
|
||||||
|
css_migration_enabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ccw_machine_2_9_class_options(MachineClass *mc)
|
static void ccw_machine_2_9_class_options(MachineClass *mc)
|
||||||
@ -1256,7 +1257,6 @@ static void ccw_machine_2_9_class_options(MachineClass *mc)
|
|||||||
ccw_machine_2_10_class_options(mc);
|
ccw_machine_2_10_class_options(mc);
|
||||||
compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
|
compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
|
||||||
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
||||||
css_migration_enabled = false;
|
|
||||||
}
|
}
|
||||||
DEFINE_CCW_MACHINE(2, 9);
|
DEFINE_CCW_MACHINE(2, 9);
|
||||||
|
|
||||||
|
@ -104,4 +104,8 @@ bool migration_incoming_postcopy_advised(void);
|
|||||||
/* True if background snapshot is active */
|
/* True if background snapshot is active */
|
||||||
bool migration_in_bg_snapshot(void);
|
bool migration_in_bg_snapshot(void);
|
||||||
|
|
||||||
|
/* Wrapper for block active/inactive operations */
|
||||||
|
bool migration_block_activate(Error **errp);
|
||||||
|
bool migration_block_inactivate(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
94
migration/block-active.c
Normal file
94
migration/block-active.c
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
/*
|
||||||
|
* Block activation tracking for migration purpose
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
*
|
||||||
|
* Copyright (C) 2024 Red Hat, Inc.
|
||||||
|
*/
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
#include "block/block.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "migration/migration.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Migration-only cache to remember the block layer activation status.
|
||||||
|
* Protected by BQL.
|
||||||
|
*
|
||||||
|
* We need this because..
|
||||||
|
*
|
||||||
|
* - Migration can fail after block devices are invalidated (during
|
||||||
|
* switchover phase). When that happens, we need to be able to recover
|
||||||
|
* the block drive status by re-activating them.
|
||||||
|
*
|
||||||
|
* - Currently bdrv_inactivate_all() is not safe to be invoked on top of
|
||||||
|
* invalidated drives (even if bdrv_activate_all() is actually safe to be
|
||||||
|
* called any time!). It means remembering this could help migration to
|
||||||
|
* make sure it won't invalidate twice in a row, crashing QEMU. It can
|
||||||
|
* happen when we migrate a PAUSED VM from host1 to host2, then migrate
|
||||||
|
* again to host3 without starting it. TODO: a cleaner solution is to
|
||||||
|
* allow safe invoke of bdrv_inactivate_all() at anytime, like
|
||||||
|
* bdrv_activate_all().
|
||||||
|
*
|
||||||
|
* For freshly started QEMU, the flag is initialized to TRUE reflecting the
|
||||||
|
* scenario where QEMU owns block device ownerships.
|
||||||
|
*
|
||||||
|
* For incoming QEMU taking a migration stream, the flag is initialized to
|
||||||
|
* FALSE reflecting that the incoming side doesn't own the block devices,
|
||||||
|
* not until switchover happens.
|
||||||
|
*/
|
||||||
|
static bool migration_block_active;
|
||||||
|
|
||||||
|
/* Setup the disk activation status */
|
||||||
|
void migration_block_active_setup(bool active)
|
||||||
|
{
|
||||||
|
migration_block_active = active;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool migration_block_activate(Error **errp)
|
||||||
|
{
|
||||||
|
ERRP_GUARD();
|
||||||
|
|
||||||
|
assert(bql_locked());
|
||||||
|
|
||||||
|
if (migration_block_active) {
|
||||||
|
trace_migration_block_activation("active-skipped");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_migration_block_activation("active");
|
||||||
|
|
||||||
|
bdrv_activate_all(errp);
|
||||||
|
if (*errp) {
|
||||||
|
error_report_err(error_copy(*errp));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
migration_block_active = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool migration_block_inactivate(void)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
assert(bql_locked());
|
||||||
|
|
||||||
|
if (!migration_block_active) {
|
||||||
|
trace_migration_block_activation("inactive-skipped");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_migration_block_activation("inactive");
|
||||||
|
|
||||||
|
ret = bdrv_inactivate_all();
|
||||||
|
if (ret) {
|
||||||
|
error_report("%s: bdrv_inactivate_all() failed: %d",
|
||||||
|
__func__, ret);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
migration_block_active = false;
|
||||||
|
return true;
|
||||||
|
}
|
@ -836,7 +836,7 @@ static void *colo_process_incoming_thread(void *opaque)
|
|||||||
|
|
||||||
/* Make sure all file formats throw away their mutable metadata */
|
/* Make sure all file formats throw away their mutable metadata */
|
||||||
bql_lock();
|
bql_lock();
|
||||||
bdrv_activate_all(&local_err);
|
migration_block_activate(&local_err);
|
||||||
bql_unlock();
|
bql_unlock();
|
||||||
if (local_err) {
|
if (local_err) {
|
||||||
error_report_err(local_err);
|
error_report_err(local_err);
|
||||||
|
@ -11,6 +11,7 @@ migration_files = files(
|
|||||||
|
|
||||||
system_ss.add(files(
|
system_ss.add(files(
|
||||||
'block-dirty-bitmap.c',
|
'block-dirty-bitmap.c',
|
||||||
|
'block-active.c',
|
||||||
'channel.c',
|
'channel.c',
|
||||||
'channel-block.c',
|
'channel-block.c',
|
||||||
'cpu-throttle.c',
|
'cpu-throttle.c',
|
||||||
|
@ -135,6 +135,21 @@ static bool migration_needs_multiple_sockets(void)
|
|||||||
return migrate_multifd() || migrate_postcopy_preempt();
|
return migrate_multifd() || migrate_postcopy_preempt();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static RunState migration_get_target_runstate(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When the global state is not migrated, it means we don't know the
|
||||||
|
* runstate of the src QEMU. We don't have much choice but assuming
|
||||||
|
* the VM is running. NOTE: this is pretty rare case, so far only Xen
|
||||||
|
* uses it.
|
||||||
|
*/
|
||||||
|
if (!global_state_received()) {
|
||||||
|
return RUN_STATE_RUNNING;
|
||||||
|
}
|
||||||
|
|
||||||
|
return global_state_get_runstate();
|
||||||
|
}
|
||||||
|
|
||||||
static bool transport_supports_multi_channels(MigrationAddress *addr)
|
static bool transport_supports_multi_channels(MigrationAddress *addr)
|
||||||
{
|
{
|
||||||
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
|
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
|
||||||
@ -723,30 +738,10 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
|
|||||||
|
|
||||||
static void process_incoming_migration_bh(void *opaque)
|
static void process_incoming_migration_bh(void *opaque)
|
||||||
{
|
{
|
||||||
Error *local_err = NULL;
|
|
||||||
MigrationIncomingState *mis = opaque;
|
MigrationIncomingState *mis = opaque;
|
||||||
|
|
||||||
trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
|
trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter");
|
||||||
|
|
||||||
/* If capability late_block_activate is set:
|
|
||||||
* Only fire up the block code now if we're going to restart the
|
|
||||||
* VM, else 'cont' will do it.
|
|
||||||
* This causes file locking to happen; so we don't want it to happen
|
|
||||||
* unless we really are starting the VM.
|
|
||||||
*/
|
|
||||||
if (!migrate_late_block_activate() ||
|
|
||||||
(autostart && (!global_state_received() ||
|
|
||||||
runstate_is_live(global_state_get_runstate())))) {
|
|
||||||
/* Make sure all file formats throw away their mutable metadata.
|
|
||||||
* If we get an error here, just don't restart the VM yet. */
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
local_err = NULL;
|
|
||||||
autostart = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This must happen after all error conditions are dealt with and
|
* This must happen after all error conditions are dealt with and
|
||||||
* we're sure the VM is going to be running on this host.
|
* we're sure the VM is going to be running on this host.
|
||||||
@ -759,10 +754,23 @@ static void process_incoming_migration_bh(void *opaque)
|
|||||||
|
|
||||||
dirty_bitmap_mig_before_vm_start();
|
dirty_bitmap_mig_before_vm_start();
|
||||||
|
|
||||||
if (!global_state_received() ||
|
if (runstate_is_live(migration_get_target_runstate())) {
|
||||||
runstate_is_live(global_state_get_runstate())) {
|
|
||||||
if (autostart) {
|
if (autostart) {
|
||||||
vm_start();
|
/*
|
||||||
|
* Block activation is always delayed until VM starts, either
|
||||||
|
* here (which means we need to start the dest VM right now..),
|
||||||
|
* or until qmp_cont() later.
|
||||||
|
*
|
||||||
|
* We used to have cap 'late-block-activate' but now we do this
|
||||||
|
* unconditionally, as it has no harm but only benefit. E.g.,
|
||||||
|
* it's not part of migration ABI on the time of disk activation.
|
||||||
|
*
|
||||||
|
* Make sure all file formats throw away their mutable
|
||||||
|
* metadata. If error, don't restart the VM yet.
|
||||||
|
*/
|
||||||
|
if (migration_block_activate(NULL)) {
|
||||||
|
vm_start();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
runstate_set(RUN_STATE_PAUSED);
|
runstate_set(RUN_STATE_PAUSED);
|
||||||
}
|
}
|
||||||
@ -1547,16 +1555,6 @@ static void migrate_fd_cancel(MigrationState *s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
|
|
||||||
Error *local_err = NULL;
|
|
||||||
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
} else {
|
|
||||||
s->block_inactive = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void migration_add_notifier_mode(NotifierWithReturn *notify,
|
void migration_add_notifier_mode(NotifierWithReturn *notify,
|
||||||
@ -1840,6 +1838,12 @@ void qmp_migrate_incoming(const char *uri, bool has_channels,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Newly setup incoming QEMU. Mark the block active state to reflect
|
||||||
|
* that the src currently owns the disks.
|
||||||
|
*/
|
||||||
|
migration_block_active_setup(false);
|
||||||
|
|
||||||
once = false;
|
once = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2492,7 +2496,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
|
|||||||
QIOChannelBuffer *bioc;
|
QIOChannelBuffer *bioc;
|
||||||
QEMUFile *fb;
|
QEMUFile *fb;
|
||||||
uint64_t bandwidth = migrate_max_postcopy_bandwidth();
|
uint64_t bandwidth = migrate_max_postcopy_bandwidth();
|
||||||
bool restart_block = false;
|
|
||||||
int cur_state = MIGRATION_STATUS_ACTIVE;
|
int cur_state = MIGRATION_STATUS_ACTIVE;
|
||||||
|
|
||||||
if (migrate_postcopy_preempt()) {
|
if (migrate_postcopy_preempt()) {
|
||||||
@ -2528,13 +2531,10 @@ static int postcopy_start(MigrationState *ms, Error **errp)
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = bdrv_inactivate_all();
|
if (!migration_block_inactivate()) {
|
||||||
if (ret < 0) {
|
error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__);
|
||||||
error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()",
|
|
||||||
__func__);
|
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
restart_block = true;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cause any non-postcopiable, but iterative devices to
|
* Cause any non-postcopiable, but iterative devices to
|
||||||
@ -2604,8 +2604,6 @@ static int postcopy_start(MigrationState *ms, Error **errp)
|
|||||||
goto fail_closefb;
|
goto fail_closefb;
|
||||||
}
|
}
|
||||||
|
|
||||||
restart_block = false;
|
|
||||||
|
|
||||||
/* Now send that blob */
|
/* Now send that blob */
|
||||||
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
|
if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
|
||||||
error_setg(errp, "%s: Failed to send packaged data", __func__);
|
error_setg(errp, "%s: Failed to send packaged data", __func__);
|
||||||
@ -2650,17 +2648,7 @@ fail_closefb:
|
|||||||
fail:
|
fail:
|
||||||
migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
|
||||||
MIGRATION_STATUS_FAILED);
|
MIGRATION_STATUS_FAILED);
|
||||||
if (restart_block) {
|
migration_block_activate(NULL);
|
||||||
/* A failure happened early enough that we know the destination hasn't
|
|
||||||
* accessed block devices, so we're safe to recover.
|
|
||||||
*/
|
|
||||||
Error *local_err = NULL;
|
|
||||||
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
|
migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
|
||||||
bql_unlock();
|
bql_unlock();
|
||||||
return -1;
|
return -1;
|
||||||
@ -2729,14 +2717,11 @@ static int migration_completion_precopy(MigrationState *s,
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Inactivate disks except in COLO, and track that we have done so in order
|
|
||||||
* to remember to reactivate them if migration fails or is cancelled.
|
|
||||||
*/
|
|
||||||
s->block_inactive = !migrate_colo();
|
|
||||||
migration_rate_set(RATE_LIMIT_DISABLED);
|
migration_rate_set(RATE_LIMIT_DISABLED);
|
||||||
|
|
||||||
|
/* Inactivate disks except in COLO */
|
||||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||||
s->block_inactive);
|
!migrate_colo());
|
||||||
out_unlock:
|
out_unlock:
|
||||||
bql_unlock();
|
bql_unlock();
|
||||||
return ret;
|
return ret;
|
||||||
@ -2761,31 +2746,6 @@ static void migration_completion_postcopy(MigrationState *s)
|
|||||||
trace_migration_completion_postcopy_end_after_complete();
|
trace_migration_completion_postcopy_end_after_complete();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void migration_completion_failed(MigrationState *s,
|
|
||||||
int current_active_state)
|
|
||||||
{
|
|
||||||
if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
|
|
||||||
s->state == MIGRATION_STATUS_DEVICE)) {
|
|
||||||
/*
|
|
||||||
* If not doing postcopy, vm_start() will be called: let's
|
|
||||||
* regain control on images.
|
|
||||||
*/
|
|
||||||
Error *local_err = NULL;
|
|
||||||
|
|
||||||
bql_lock();
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
} else {
|
|
||||||
s->block_inactive = false;
|
|
||||||
}
|
|
||||||
bql_unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
migrate_set_state(&s->state, current_active_state,
|
|
||||||
MIGRATION_STATUS_FAILED);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* migration_completion: Used by migration_thread when there's not much left.
|
* migration_completion: Used by migration_thread when there's not much left.
|
||||||
* The caller 'breaks' the loop when this returns.
|
* The caller 'breaks' the loop when this returns.
|
||||||
@ -2839,7 +2799,8 @@ fail:
|
|||||||
error_free(local_err);
|
error_free(local_err);
|
||||||
}
|
}
|
||||||
|
|
||||||
migration_completion_failed(s, current_active_state);
|
migrate_set_state(&s->state, current_active_state,
|
||||||
|
MIGRATION_STATUS_FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -3269,6 +3230,11 @@ static void migration_iteration_finish(MigrationState *s)
|
|||||||
case MIGRATION_STATUS_FAILED:
|
case MIGRATION_STATUS_FAILED:
|
||||||
case MIGRATION_STATUS_CANCELLED:
|
case MIGRATION_STATUS_CANCELLED:
|
||||||
case MIGRATION_STATUS_CANCELLING:
|
case MIGRATION_STATUS_CANCELLING:
|
||||||
|
/*
|
||||||
|
* Re-activate the block drives if they're inactivated. Note, COLO
|
||||||
|
* shouldn't use block_active at all, so it should be no-op there.
|
||||||
|
*/
|
||||||
|
migration_block_activate(NULL);
|
||||||
if (runstate_is_live(s->vm_old_state)) {
|
if (runstate_is_live(s->vm_old_state)) {
|
||||||
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
|
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
|
||||||
vm_start();
|
vm_start();
|
||||||
@ -3842,6 +3808,8 @@ static void migration_instance_init(Object *obj)
|
|||||||
ms->state = MIGRATION_STATUS_NONE;
|
ms->state = MIGRATION_STATUS_NONE;
|
||||||
ms->mbps = -1;
|
ms->mbps = -1;
|
||||||
ms->pages_per_second = -1;
|
ms->pages_per_second = -1;
|
||||||
|
/* Freshly started QEMU owns all the block devices */
|
||||||
|
migration_block_active_setup(true);
|
||||||
qemu_sem_init(&ms->pause_sem, 0);
|
qemu_sem_init(&ms->pause_sem, 0);
|
||||||
qemu_mutex_init(&ms->error_mutex);
|
qemu_mutex_init(&ms->error_mutex);
|
||||||
|
|
||||||
|
@ -370,9 +370,6 @@ struct MigrationState {
|
|||||||
/* Flag set once the migration thread is running (and needs joining) */
|
/* Flag set once the migration thread is running (and needs joining) */
|
||||||
bool migration_thread_running;
|
bool migration_thread_running;
|
||||||
|
|
||||||
/* Flag set once the migration thread called bdrv_inactivate_all */
|
|
||||||
bool block_inactive;
|
|
||||||
|
|
||||||
/* Migration is waiting for guest to unplug device */
|
/* Migration is waiting for guest to unplug device */
|
||||||
QemuSemaphore wait_unplug_sem;
|
QemuSemaphore wait_unplug_sem;
|
||||||
|
|
||||||
@ -556,4 +553,7 @@ void migration_bitmap_sync_precopy(bool last_stage);
|
|||||||
/* migration/block-dirty-bitmap.c */
|
/* migration/block-dirty-bitmap.c */
|
||||||
void dirty_bitmap_mig_init(void);
|
void dirty_bitmap_mig_init(void);
|
||||||
|
|
||||||
|
/* migration/block-active.c */
|
||||||
|
void migration_block_active_setup(bool active);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include "qemu/cutils.h"
|
#include "qemu/cutils.h"
|
||||||
#include "qemu/error-report.h"
|
#include "qemu/error-report.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
#include "qemu-file.h"
|
||||||
|
|
||||||
static MultiFDSendData *multifd_ram_send;
|
static MultiFDSendData *multifd_ram_send;
|
||||||
|
|
||||||
@ -343,8 +344,53 @@ retry:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int multifd_ram_flush_and_sync(void)
|
/*
|
||||||
|
* We have two modes for multifd flushes:
|
||||||
|
*
|
||||||
|
* - Per-section mode: this is the legacy way to flush, it requires one
|
||||||
|
* MULTIFD_FLAG_SYNC message for each RAM_SAVE_FLAG_EOS.
|
||||||
|
*
|
||||||
|
* - Per-round mode: this is the modern way to flush, it requires one
|
||||||
|
* MULTIFD_FLAG_SYNC message only for each round of RAM scan. Normally
|
||||||
|
* it's paired with a new RAM_SAVE_FLAG_MULTIFD_FLUSH message in network
|
||||||
|
* based migrations.
|
||||||
|
*
|
||||||
|
* One thing to mention is mapped-ram always use the modern way to sync.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Do we need a per-section multifd flush (legacy way)? */
|
||||||
|
bool multifd_ram_sync_per_section(void)
|
||||||
{
|
{
|
||||||
|
if (!migrate_multifd()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (migrate_mapped_ram()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return migrate_multifd_flush_after_each_section();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do we need a per-round multifd flush (modern way)? */
|
||||||
|
bool multifd_ram_sync_per_round(void)
|
||||||
|
{
|
||||||
|
if (!migrate_multifd()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (migrate_mapped_ram()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !migrate_multifd_flush_after_each_section();
|
||||||
|
}
|
||||||
|
|
||||||
|
int multifd_ram_flush_and_sync(QEMUFile *f)
|
||||||
|
{
|
||||||
|
MultiFDSyncReq req;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!migrate_multifd()) {
|
if (!migrate_multifd()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -356,12 +402,37 @@ int multifd_ram_flush_and_sync(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return multifd_send_sync_main();
|
/* File migrations only need to sync with threads */
|
||||||
|
req = migrate_mapped_ram() ? MULTIFD_SYNC_LOCAL : MULTIFD_SYNC_ALL;
|
||||||
|
|
||||||
|
ret = multifd_send_sync_main(req);
|
||||||
|
if (ret) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we don't need to sync with remote at all, nothing else to do */
|
||||||
|
if (req == MULTIFD_SYNC_LOCAL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Old QEMUs don't understand RAM_SAVE_FLAG_MULTIFD_FLUSH, it relies
|
||||||
|
* on RAM_SAVE_FLAG_EOS instead.
|
||||||
|
*/
|
||||||
|
if (migrate_multifd_flush_after_each_section()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
||||||
|
qemu_fflush(f);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool multifd_send_prepare_common(MultiFDSendParams *p)
|
bool multifd_send_prepare_common(MultiFDSendParams *p)
|
||||||
{
|
{
|
||||||
MultiFDPages_t *pages = &p->data->u.ram;
|
MultiFDPages_t *pages = &p->data->u.ram;
|
||||||
|
multifd_send_prepare_header(p);
|
||||||
multifd_send_zero_page_detect(p);
|
multifd_send_zero_page_detect(p);
|
||||||
|
|
||||||
if (!pages->normal_num) {
|
if (!pages->normal_num) {
|
||||||
@ -369,8 +440,6 @@ bool multifd_send_prepare_common(MultiFDSendParams *p)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
multifd_send_prepare_header(p);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -373,6 +373,7 @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp)
|
|||||||
/* Copy each page to its appropriate location. */
|
/* Copy each page to its appropriate location. */
|
||||||
for (int i = 0; i < p->normal_num; i++) {
|
for (int i = 0; i < p->normal_num; i++) {
|
||||||
memcpy(p->host + p->normal[i], q->out_buf + page_size * i, page_size);
|
memcpy(p->host + p->normal[i], q->out_buf + page_size * i, page_size);
|
||||||
|
ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -679,6 +679,7 @@ static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp)
|
|||||||
qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]);
|
qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]);
|
||||||
assert(qpl->zlen[i] <= multifd_ram_page_size());
|
assert(qpl->zlen[i] <= multifd_ram_page_size());
|
||||||
zbuf_len += qpl->zlen[i];
|
zbuf_len += qpl->zlen[i];
|
||||||
|
ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* read compressed pages */
|
/* read compressed pages */
|
||||||
|
@ -169,7 +169,7 @@ static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp)
|
|||||||
.src_len = page_size,
|
.src_len = page_size,
|
||||||
.dst = buf,
|
.dst = buf,
|
||||||
/* Set dst_len to double the src in case compressed out >= page_size */
|
/* Set dst_len to double the src in case compressed out >= page_size */
|
||||||
.dst_len = p->page_size * 2,
|
.dst_len = page_size * 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (uadk_data->handle) {
|
if (uadk_data->handle) {
|
||||||
|
@ -252,9 +252,8 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
|
|||||||
p->packet_num = be64_to_cpu(packet->packet_num);
|
p->packet_num = be64_to_cpu(packet->packet_num);
|
||||||
p->packets_recved++;
|
p->packets_recved++;
|
||||||
|
|
||||||
if (!(p->flags & MULTIFD_FLAG_SYNC)) {
|
/* Always unfill, old QEMUs (<9.0) send data along with SYNC */
|
||||||
ret = multifd_ram_unfill_packet(p, errp);
|
ret = multifd_ram_unfill_packet(p, errp);
|
||||||
}
|
|
||||||
|
|
||||||
trace_multifd_recv_unfill(p->id, p->packet_num, p->flags,
|
trace_multifd_recv_unfill(p->id, p->packet_num, p->flags,
|
||||||
p->next_packet_size);
|
p->next_packet_size);
|
||||||
@ -523,11 +522,13 @@ static int multifd_zero_copy_flush(QIOChannel *c)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int multifd_send_sync_main(void)
|
int multifd_send_sync_main(MultiFDSyncReq req)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
bool flush_zero_copy;
|
bool flush_zero_copy;
|
||||||
|
|
||||||
|
assert(req != MULTIFD_SYNC_NONE);
|
||||||
|
|
||||||
flush_zero_copy = migrate_zero_copy_send();
|
flush_zero_copy = migrate_zero_copy_send();
|
||||||
|
|
||||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||||
@ -543,8 +544,8 @@ int multifd_send_sync_main(void)
|
|||||||
* We should be the only user so far, so not possible to be set by
|
* We should be the only user so far, so not possible to be set by
|
||||||
* others concurrently.
|
* others concurrently.
|
||||||
*/
|
*/
|
||||||
assert(qatomic_read(&p->pending_sync) == false);
|
assert(qatomic_read(&p->pending_sync) == MULTIFD_SYNC_NONE);
|
||||||
qatomic_set(&p->pending_sync, true);
|
qatomic_set(&p->pending_sync, req);
|
||||||
qemu_sem_post(&p->sem);
|
qemu_sem_post(&p->sem);
|
||||||
}
|
}
|
||||||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||||
@ -635,14 +636,17 @@ static void *multifd_send_thread(void *opaque)
|
|||||||
*/
|
*/
|
||||||
qatomic_store_release(&p->pending_job, false);
|
qatomic_store_release(&p->pending_job, false);
|
||||||
} else {
|
} else {
|
||||||
|
MultiFDSyncReq req = qatomic_read(&p->pending_sync);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If not a normal job, must be a sync request. Note that
|
* If not a normal job, must be a sync request. Note that
|
||||||
* pending_sync is a standalone flag (unlike pending_job), so
|
* pending_sync is a standalone flag (unlike pending_job), so
|
||||||
* it doesn't require explicit memory barriers.
|
* it doesn't require explicit memory barriers.
|
||||||
*/
|
*/
|
||||||
assert(qatomic_read(&p->pending_sync));
|
assert(req != MULTIFD_SYNC_NONE);
|
||||||
|
|
||||||
if (use_packets) {
|
/* Only push the SYNC message if it involves a remote sync */
|
||||||
|
if (req == MULTIFD_SYNC_ALL) {
|
||||||
p->flags = MULTIFD_FLAG_SYNC;
|
p->flags = MULTIFD_FLAG_SYNC;
|
||||||
multifd_send_fill_packet(p);
|
multifd_send_fill_packet(p);
|
||||||
ret = qio_channel_write_all(p->c, (void *)p->packet,
|
ret = qio_channel_write_all(p->c, (void *)p->packet,
|
||||||
@ -654,7 +658,7 @@ static void *multifd_send_thread(void *opaque)
|
|||||||
stat64_add(&mig_stats.multifd_bytes, p->packet_len);
|
stat64_add(&mig_stats.multifd_bytes, p->packet_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
qatomic_set(&p->pending_sync, false);
|
qatomic_set(&p->pending_sync, MULTIFD_SYNC_NONE);
|
||||||
qemu_sem_post(&p->sem_sync);
|
qemu_sem_post(&p->sem_sync);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1151,9 +1155,13 @@ static void *multifd_recv_thread(void *opaque)
|
|||||||
flags = p->flags;
|
flags = p->flags;
|
||||||
/* recv methods don't know how to handle the SYNC flag */
|
/* recv methods don't know how to handle the SYNC flag */
|
||||||
p->flags &= ~MULTIFD_FLAG_SYNC;
|
p->flags &= ~MULTIFD_FLAG_SYNC;
|
||||||
if (!(flags & MULTIFD_FLAG_SYNC)) {
|
|
||||||
has_data = p->normal_num || p->zero_num;
|
/*
|
||||||
}
|
* Even if it's a SYNC packet, this needs to be set
|
||||||
|
* because older QEMUs (<9.0) still send data along with
|
||||||
|
* the SYNC packet.
|
||||||
|
*/
|
||||||
|
has_data = p->normal_num || p->zero_num;
|
||||||
qemu_mutex_unlock(&p->mutex);
|
qemu_mutex_unlock(&p->mutex);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
@ -19,6 +19,22 @@
|
|||||||
typedef struct MultiFDRecvData MultiFDRecvData;
|
typedef struct MultiFDRecvData MultiFDRecvData;
|
||||||
typedef struct MultiFDSendData MultiFDSendData;
|
typedef struct MultiFDSendData MultiFDSendData;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
/* No sync request */
|
||||||
|
MULTIFD_SYNC_NONE = 0,
|
||||||
|
/* Sync locally on the sender threads without pushing messages */
|
||||||
|
MULTIFD_SYNC_LOCAL,
|
||||||
|
/*
|
||||||
|
* Sync not only on the sender threads, but also push MULTIFD_FLAG_SYNC
|
||||||
|
* message to the wire for each iochannel (which is for a remote sync).
|
||||||
|
*
|
||||||
|
* When remote sync is used, need to be paired with a follow up
|
||||||
|
* RAM_SAVE_FLAG_EOS / RAM_SAVE_FLAG_MULTIFD_FLUSH message on the main
|
||||||
|
* channel.
|
||||||
|
*/
|
||||||
|
MULTIFD_SYNC_ALL,
|
||||||
|
} MultiFDSyncReq;
|
||||||
|
|
||||||
bool multifd_send_setup(void);
|
bool multifd_send_setup(void);
|
||||||
void multifd_send_shutdown(void);
|
void multifd_send_shutdown(void);
|
||||||
void multifd_send_channel_created(void);
|
void multifd_send_channel_created(void);
|
||||||
@ -28,7 +44,7 @@ void multifd_recv_shutdown(void);
|
|||||||
bool multifd_recv_all_channels_created(void);
|
bool multifd_recv_all_channels_created(void);
|
||||||
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
||||||
void multifd_recv_sync_main(void);
|
void multifd_recv_sync_main(void);
|
||||||
int multifd_send_sync_main(void);
|
int multifd_send_sync_main(MultiFDSyncReq req);
|
||||||
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
|
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
|
||||||
bool multifd_recv(void);
|
bool multifd_recv(void);
|
||||||
MultiFDRecvData *multifd_get_recv_data(void);
|
MultiFDRecvData *multifd_get_recv_data(void);
|
||||||
@ -143,7 +159,7 @@ typedef struct {
|
|||||||
/* multifd flags for each packet */
|
/* multifd flags for each packet */
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
/*
|
/*
|
||||||
* The sender thread has work to do if either of below boolean is set.
|
* The sender thread has work to do if either of below field is set.
|
||||||
*
|
*
|
||||||
* @pending_job: a job is pending
|
* @pending_job: a job is pending
|
||||||
* @pending_sync: a sync request is pending
|
* @pending_sync: a sync request is pending
|
||||||
@ -152,7 +168,8 @@ typedef struct {
|
|||||||
* cleared by the multifd sender threads.
|
* cleared by the multifd sender threads.
|
||||||
*/
|
*/
|
||||||
bool pending_job;
|
bool pending_job;
|
||||||
bool pending_sync;
|
MultiFDSyncReq pending_sync;
|
||||||
|
|
||||||
MultiFDSendData *data;
|
MultiFDSendData *data;
|
||||||
|
|
||||||
/* thread local variables. No locking required */
|
/* thread local variables. No locking required */
|
||||||
@ -337,7 +354,9 @@ static inline uint32_t multifd_ram_page_count(void)
|
|||||||
|
|
||||||
void multifd_ram_save_setup(void);
|
void multifd_ram_save_setup(void);
|
||||||
void multifd_ram_save_cleanup(void);
|
void multifd_ram_save_cleanup(void);
|
||||||
int multifd_ram_flush_and_sync(void);
|
int multifd_ram_flush_and_sync(QEMUFile *f);
|
||||||
|
bool multifd_ram_sync_per_round(void);
|
||||||
|
bool multifd_ram_sync_per_section(void);
|
||||||
size_t multifd_ram_payload_size(void);
|
size_t multifd_ram_payload_size(void);
|
||||||
void multifd_ram_fill_packet(MultiFDSendParams *p);
|
void multifd_ram_fill_packet(MultiFDSendParams *p);
|
||||||
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
|
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
|
||||||
|
@ -71,27 +71,6 @@
|
|||||||
/***********************************************************/
|
/***********************************************************/
|
||||||
/* ram save/restore */
|
/* ram save/restore */
|
||||||
|
|
||||||
/*
|
|
||||||
* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
|
|
||||||
* worked for pages that were filled with the same char. We switched
|
|
||||||
* it to only search for the zero value. And to avoid confusion with
|
|
||||||
* RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
|
|
||||||
*
|
|
||||||
* RAM_SAVE_FLAG_FULL was obsoleted in 2009.
|
|
||||||
*
|
|
||||||
* RAM_SAVE_FLAG_COMPRESS_PAGE (0x100) was removed in QEMU 9.1.
|
|
||||||
*/
|
|
||||||
#define RAM_SAVE_FLAG_FULL 0x01
|
|
||||||
#define RAM_SAVE_FLAG_ZERO 0x02
|
|
||||||
#define RAM_SAVE_FLAG_MEM_SIZE 0x04
|
|
||||||
#define RAM_SAVE_FLAG_PAGE 0x08
|
|
||||||
#define RAM_SAVE_FLAG_EOS 0x10
|
|
||||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
|
|
||||||
#define RAM_SAVE_FLAG_XBZRLE 0x40
|
|
||||||
/* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */
|
|
||||||
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
|
|
||||||
/* We can't use any flag that is bigger than 0x200 */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mapped-ram migration supports O_DIRECT, so we need to make sure the
|
* mapped-ram migration supports O_DIRECT, so we need to make sure the
|
||||||
* userspace buffer, the IO operation size and the file offset are
|
* userspace buffer, the IO operation size and the file offset are
|
||||||
@ -1323,19 +1302,12 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss)
|
|||||||
pss->page = 0;
|
pss->page = 0;
|
||||||
pss->block = QLIST_NEXT_RCU(pss->block, next);
|
pss->block = QLIST_NEXT_RCU(pss->block, next);
|
||||||
if (!pss->block) {
|
if (!pss->block) {
|
||||||
if (migrate_multifd() &&
|
if (multifd_ram_sync_per_round()) {
|
||||||
(!migrate_multifd_flush_after_each_section() ||
|
|
||||||
migrate_mapped_ram())) {
|
|
||||||
QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
|
QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
|
||||||
int ret = multifd_ram_flush_and_sync();
|
int ret = multifd_ram_flush_and_sync(f);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!migrate_mapped_ram()) {
|
|
||||||
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
|
||||||
qemu_fflush(f);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Hit the end of the list */
|
/* Hit the end of the list */
|
||||||
@ -3064,19 +3036,39 @@ static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp)
|
|||||||
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
|
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This operation is unfortunate..
|
||||||
|
*
|
||||||
|
* For legacy QEMUs using per-section sync
|
||||||
|
* =======================================
|
||||||
|
*
|
||||||
|
* This must exist because the EOS below requires the SYNC messages
|
||||||
|
* per-channel to work.
|
||||||
|
*
|
||||||
|
* For modern QEMUs using per-round sync
|
||||||
|
* =====================================
|
||||||
|
*
|
||||||
|
* Logically such sync is not needed, and recv threads should not run
|
||||||
|
* until setup ready (using things like channels_ready on src). Then
|
||||||
|
* we should be all fine.
|
||||||
|
*
|
||||||
|
* However even if we add channels_ready to recv side in new QEMUs, old
|
||||||
|
* QEMU won't have them so this sync will still be needed to make sure
|
||||||
|
* multifd recv threads won't start processing guest pages early before
|
||||||
|
* ram_load_setup() is properly done.
|
||||||
|
*
|
||||||
|
* Let's stick with this. Fortunately the overhead is low to sync
|
||||||
|
* during setup because the VM is running, so at least it's not
|
||||||
|
* accounted as part of downtime.
|
||||||
|
*/
|
||||||
bql_unlock();
|
bql_unlock();
|
||||||
ret = multifd_ram_flush_and_sync();
|
ret = multifd_ram_flush_and_sync(f);
|
||||||
bql_lock();
|
bql_lock();
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
error_setg(errp, "%s: multifd synchronization failed", __func__);
|
error_setg(errp, "%s: multifd synchronization failed", __func__);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (migrate_multifd() && !migrate_multifd_flush_after_each_section()
|
|
||||||
&& !migrate_mapped_ram()) {
|
|
||||||
qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
|
|
||||||
}
|
|
||||||
|
|
||||||
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
|
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
|
||||||
ret = qemu_fflush(f);
|
ret = qemu_fflush(f);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
@ -3209,9 +3201,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
if (ret >= 0 && migration_is_running()) {
|
if (ret >= 0 && migration_is_running()) {
|
||||||
if (migrate_multifd() && migrate_multifd_flush_after_each_section() &&
|
if (multifd_ram_sync_per_section()) {
|
||||||
!migrate_mapped_ram()) {
|
ret = multifd_ram_flush_and_sync(f);
|
||||||
ret = multifd_ram_flush_and_sync();
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -3283,9 +3274,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = multifd_ram_flush_and_sync();
|
if (multifd_ram_sync_per_section()) {
|
||||||
if (ret < 0) {
|
/*
|
||||||
return ret;
|
* Only the old dest QEMU will need this sync, because each EOS
|
||||||
|
* will require one SYNC message on each channel.
|
||||||
|
*/
|
||||||
|
ret = multifd_ram_flush_and_sync(f);
|
||||||
|
if (ret < 0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (migrate_mapped_ram()) {
|
if (migrate_mapped_ram()) {
|
||||||
@ -3796,15 +3793,7 @@ int ram_load_postcopy(QEMUFile *f, int channel)
|
|||||||
TARGET_PAGE_SIZE);
|
TARGET_PAGE_SIZE);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case RAM_SAVE_FLAG_MULTIFD_FLUSH:
|
|
||||||
multifd_recv_sync_main();
|
|
||||||
break;
|
|
||||||
case RAM_SAVE_FLAG_EOS:
|
case RAM_SAVE_FLAG_EOS:
|
||||||
/* normal exit */
|
|
||||||
if (migrate_multifd() &&
|
|
||||||
migrate_multifd_flush_after_each_section()) {
|
|
||||||
multifd_recv_sync_main();
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
error_report("Unknown combination of migration flags: 0x%x"
|
error_report("Unknown combination of migration flags: 0x%x"
|
||||||
|
@ -33,6 +33,34 @@
|
|||||||
#include "exec/cpu-common.h"
|
#include "exec/cpu-common.h"
|
||||||
#include "io/channel.h"
|
#include "io/channel.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
|
||||||
|
* worked for pages that were filled with the same char. We switched
|
||||||
|
* it to only search for the zero value. And to avoid confusion with
|
||||||
|
* RAM_SAVE_FLAG_COMPRESS_PAGE just rename it.
|
||||||
|
*
|
||||||
|
* RAM_SAVE_FLAG_FULL (0x01) was obsoleted in 2009.
|
||||||
|
*
|
||||||
|
* RAM_SAVE_FLAG_COMPRESS_PAGE (0x100) was removed in QEMU 9.1.
|
||||||
|
*
|
||||||
|
* RAM_SAVE_FLAG_HOOK is only used in RDMA. Whenever this is found in the
|
||||||
|
* data stream, the flags will be passed to rdma functions in the
|
||||||
|
* incoming-migration side.
|
||||||
|
*
|
||||||
|
* We can't use any flag that is bigger than 0x200, because the flags are
|
||||||
|
* always assumed to be encoded in a ramblock address offset, which is
|
||||||
|
* multiple of PAGE_SIZE. Here it means QEMU supports migration with any
|
||||||
|
* architecture that has PAGE_SIZE>=1K (0x400).
|
||||||
|
*/
|
||||||
|
#define RAM_SAVE_FLAG_ZERO 0x002
|
||||||
|
#define RAM_SAVE_FLAG_MEM_SIZE 0x004
|
||||||
|
#define RAM_SAVE_FLAG_PAGE 0x008
|
||||||
|
#define RAM_SAVE_FLAG_EOS 0x010
|
||||||
|
#define RAM_SAVE_FLAG_CONTINUE 0x020
|
||||||
|
#define RAM_SAVE_FLAG_XBZRLE 0x040
|
||||||
|
#define RAM_SAVE_FLAG_HOOK 0x080
|
||||||
|
#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200
|
||||||
|
|
||||||
extern XBZRLECacheStats xbzrle_counters;
|
extern XBZRLECacheStats xbzrle_counters;
|
||||||
|
|
||||||
/* Should be holding either ram_list.mutex, or the RCU lock. */
|
/* Should be holding either ram_list.mutex, or the RCU lock. */
|
||||||
|
@ -33,13 +33,6 @@ void rdma_start_incoming_migration(InetSocketAddress *host_port, Error **errp);
|
|||||||
#define RAM_CONTROL_ROUND 1
|
#define RAM_CONTROL_ROUND 1
|
||||||
#define RAM_CONTROL_FINISH 3
|
#define RAM_CONTROL_FINISH 3
|
||||||
|
|
||||||
/*
|
|
||||||
* Whenever this is found in the data stream, the flags
|
|
||||||
* will be passed to rdma functions in the incoming-migration
|
|
||||||
* side.
|
|
||||||
*/
|
|
||||||
#define RAM_SAVE_FLAG_HOOK 0x80
|
|
||||||
|
|
||||||
#define RAM_SAVE_CONTROL_NOT_SUPP -1000
|
#define RAM_SAVE_CONTROL_NOT_SUPP -1000
|
||||||
#define RAM_SAVE_CONTROL_DELAYED -2000
|
#define RAM_SAVE_CONTROL_DELAYED -2000
|
||||||
|
|
||||||
|
@ -1547,15 +1547,16 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inactivate_disks) {
|
if (inactivate_disks) {
|
||||||
/* Inactivate before sending QEMU_VM_EOF so that the
|
/*
|
||||||
* bdrv_activate_all() on the other end won't fail. */
|
* Inactivate before sending QEMU_VM_EOF so that the
|
||||||
ret = bdrv_inactivate_all();
|
* bdrv_activate_all() on the other end won't fail.
|
||||||
if (ret) {
|
*/
|
||||||
error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
|
if (!migration_block_inactivate()) {
|
||||||
__func__, ret);
|
error_setg(&local_err, "%s: bdrv_inactivate_all() failed",
|
||||||
|
__func__);
|
||||||
migrate_set_error(ms, local_err);
|
migrate_set_error(ms, local_err);
|
||||||
error_report_err(local_err);
|
error_report_err(local_err);
|
||||||
qemu_file_set_error(f, ret);
|
qemu_file_set_error(f, -EFAULT);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2121,7 +2122,6 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
|
|||||||
|
|
||||||
static void loadvm_postcopy_handle_run_bh(void *opaque)
|
static void loadvm_postcopy_handle_run_bh(void *opaque)
|
||||||
{
|
{
|
||||||
Error *local_err = NULL;
|
|
||||||
MigrationIncomingState *mis = opaque;
|
MigrationIncomingState *mis = opaque;
|
||||||
|
|
||||||
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
|
||||||
@ -2137,22 +2137,20 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
|
|||||||
|
|
||||||
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
|
||||||
|
|
||||||
/* Make sure all file formats throw away their mutable metadata.
|
|
||||||
* If we get an error here, just don't restart the VM yet. */
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_report_err(local_err);
|
|
||||||
local_err = NULL;
|
|
||||||
autostart = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
|
|
||||||
|
|
||||||
dirty_bitmap_mig_before_vm_start();
|
dirty_bitmap_mig_before_vm_start();
|
||||||
|
|
||||||
if (autostart) {
|
if (autostart) {
|
||||||
/* Hold onto your hats, starting the CPU */
|
/*
|
||||||
vm_start();
|
* Make sure all file formats throw away their mutable metadata.
|
||||||
|
* If we get an error here, just don't restart the VM yet.
|
||||||
|
*/
|
||||||
|
bool success = migration_block_activate(NULL);
|
||||||
|
|
||||||
|
trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
vm_start();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* leave it paused and let management decide when to start the CPU */
|
/* leave it paused and let management decide when to start the CPU */
|
||||||
runstate_set(RUN_STATE_PAUSED);
|
runstate_set(RUN_STATE_PAUSED);
|
||||||
@ -3192,11 +3190,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
|
|||||||
* side of the migration take control of the images.
|
* side of the migration take control of the images.
|
||||||
*/
|
*/
|
||||||
if (live && !saved_vm_running) {
|
if (live && !saved_vm_running) {
|
||||||
ret = bdrv_inactivate_all();
|
migration_block_inactivate();
|
||||||
if (ret) {
|
|
||||||
error_setg(errp, "%s: bdrv_inactivate_all() failed (%d)",
|
|
||||||
__func__, ret);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,3 +383,6 @@ migration_pagecache_insert(void) "Error allocating page"
|
|||||||
# cpu-throttle.c
|
# cpu-throttle.c
|
||||||
cpu_throttle_set(int new_throttle_pct) "set guest CPU throttled by %d%%"
|
cpu_throttle_set(int new_throttle_pct) "set guest CPU throttled by %d%%"
|
||||||
cpu_throttle_dirty_sync(void) ""
|
cpu_throttle_dirty_sync(void) ""
|
||||||
|
|
||||||
|
# block-active.c
|
||||||
|
migration_block_activation(const char *name) "%s"
|
||||||
|
@ -338,7 +338,7 @@ static int put_nullptr(QEMUFile *f, void *pv, size_t size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const VMStateInfo vmstate_info_nullptr = {
|
const VMStateInfo vmstate_info_nullptr = {
|
||||||
.name = "uint64",
|
.name = "nullptr",
|
||||||
.get = get_nullptr,
|
.get = get_nullptr,
|
||||||
.put = put_nullptr,
|
.put = put_nullptr,
|
||||||
};
|
};
|
||||||
|
@ -51,6 +51,36 @@ vmstate_field_exists(const VMStateDescription *vmsd, const VMStateField *field,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a fake nullptr field when there's a NULL pointer detected in the
|
||||||
|
* array of a VMS_ARRAY_OF_POINTER VMSD field. It's needed because we
|
||||||
|
* can't dereference the NULL pointer.
|
||||||
|
*/
|
||||||
|
static const VMStateField *
|
||||||
|
vmsd_create_fake_nullptr_field(const VMStateField *field)
|
||||||
|
{
|
||||||
|
VMStateField *fake = g_new0(VMStateField, 1);
|
||||||
|
|
||||||
|
/* It can only happen on an array of pointers! */
|
||||||
|
assert(field->flags & VMS_ARRAY_OF_POINTER);
|
||||||
|
|
||||||
|
/* Some of fake's properties should match the original's */
|
||||||
|
fake->name = field->name;
|
||||||
|
fake->version_id = field->version_id;
|
||||||
|
|
||||||
|
/* Do not need "field_exists" check as it always exists (which is null) */
|
||||||
|
fake->field_exists = NULL;
|
||||||
|
|
||||||
|
/* See vmstate_info_nullptr - use 1 byte to represent nullptr */
|
||||||
|
fake->size = 1;
|
||||||
|
fake->info = &vmstate_info_nullptr;
|
||||||
|
fake->flags = VMS_SINGLE;
|
||||||
|
|
||||||
|
/* All the rest fields shouldn't matter.. */
|
||||||
|
|
||||||
|
return (const VMStateField *)fake;
|
||||||
|
}
|
||||||
|
|
||||||
static int vmstate_n_elems(void *opaque, const VMStateField *field)
|
static int vmstate_n_elems(void *opaque, const VMStateField *field)
|
||||||
{
|
{
|
||||||
int n_elems = 1;
|
int n_elems = 1;
|
||||||
@ -143,23 +173,39 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
|
|||||||
}
|
}
|
||||||
for (i = 0; i < n_elems; i++) {
|
for (i = 0; i < n_elems; i++) {
|
||||||
void *curr_elem = first_elem + size * i;
|
void *curr_elem = first_elem + size * i;
|
||||||
|
const VMStateField *inner_field;
|
||||||
|
|
||||||
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
||||||
curr_elem = *(void **)curr_elem;
|
curr_elem = *(void **)curr_elem;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!curr_elem && size) {
|
if (!curr_elem && size) {
|
||||||
/* if null pointer check placeholder and do not follow */
|
/*
|
||||||
assert(field->flags & VMS_ARRAY_OF_POINTER);
|
* If null pointer found (which should only happen in
|
||||||
ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
|
* an array of pointers), use null placeholder and do
|
||||||
} else if (field->flags & VMS_STRUCT) {
|
* not follow.
|
||||||
ret = vmstate_load_state(f, field->vmsd, curr_elem,
|
*/
|
||||||
field->vmsd->version_id);
|
inner_field = vmsd_create_fake_nullptr_field(field);
|
||||||
} else if (field->flags & VMS_VSTRUCT) {
|
|
||||||
ret = vmstate_load_state(f, field->vmsd, curr_elem,
|
|
||||||
field->struct_version_id);
|
|
||||||
} else {
|
} else {
|
||||||
ret = field->info->get(f, curr_elem, size, field);
|
inner_field = field;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inner_field->flags & VMS_STRUCT) {
|
||||||
|
ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
|
||||||
|
inner_field->vmsd->version_id);
|
||||||
|
} else if (inner_field->flags & VMS_VSTRUCT) {
|
||||||
|
ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
|
||||||
|
inner_field->struct_version_id);
|
||||||
|
} else {
|
||||||
|
ret = inner_field->info->get(f, curr_elem, size,
|
||||||
|
inner_field);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we used a fake temp field.. free it now */
|
||||||
|
if (inner_field != field) {
|
||||||
|
g_clear_pointer((gpointer *)&inner_field, g_free);
|
||||||
|
}
|
||||||
|
|
||||||
if (ret >= 0) {
|
if (ret >= 0) {
|
||||||
ret = qemu_file_get_error(f);
|
ret = qemu_file_get_error(f);
|
||||||
}
|
}
|
||||||
@ -311,7 +357,7 @@ static void vmsd_desc_field_start(const VMStateDescription *vmsd,
|
|||||||
|
|
||||||
static void vmsd_desc_field_end(const VMStateDescription *vmsd,
|
static void vmsd_desc_field_end(const VMStateDescription *vmsd,
|
||||||
JSONWriter *vmdesc,
|
JSONWriter *vmdesc,
|
||||||
const VMStateField *field, size_t size, int i)
|
const VMStateField *field, size_t size)
|
||||||
{
|
{
|
||||||
if (!vmdesc) {
|
if (!vmdesc) {
|
||||||
return;
|
return;
|
||||||
@ -379,37 +425,89 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
|
|||||||
int size = vmstate_size(opaque, field);
|
int size = vmstate_size(opaque, field);
|
||||||
uint64_t old_offset, written_bytes;
|
uint64_t old_offset, written_bytes;
|
||||||
JSONWriter *vmdesc_loop = vmdesc;
|
JSONWriter *vmdesc_loop = vmdesc;
|
||||||
|
bool is_prev_null = false;
|
||||||
|
|
||||||
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
|
trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
|
||||||
if (field->flags & VMS_POINTER) {
|
if (field->flags & VMS_POINTER) {
|
||||||
first_elem = *(void **)first_elem;
|
first_elem = *(void **)first_elem;
|
||||||
assert(first_elem || !n_elems || !size);
|
assert(first_elem || !n_elems || !size);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < n_elems; i++) {
|
for (i = 0; i < n_elems; i++) {
|
||||||
void *curr_elem = first_elem + size * i;
|
void *curr_elem = first_elem + size * i;
|
||||||
|
const VMStateField *inner_field;
|
||||||
|
bool is_null;
|
||||||
|
int max_elems = n_elems - i;
|
||||||
|
|
||||||
vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
|
|
||||||
old_offset = qemu_file_transferred(f);
|
old_offset = qemu_file_transferred(f);
|
||||||
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
if (field->flags & VMS_ARRAY_OF_POINTER) {
|
||||||
assert(curr_elem);
|
assert(curr_elem);
|
||||||
curr_elem = *(void **)curr_elem;
|
curr_elem = *(void **)curr_elem;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!curr_elem && size) {
|
if (!curr_elem && size) {
|
||||||
/* if null pointer write placeholder and do not follow */
|
/*
|
||||||
assert(field->flags & VMS_ARRAY_OF_POINTER);
|
* If null pointer found (which should only happen in
|
||||||
ret = vmstate_info_nullptr.put(f, curr_elem, size, NULL,
|
* an array of pointers), use null placeholder and do
|
||||||
NULL);
|
* not follow.
|
||||||
} else if (field->flags & VMS_STRUCT) {
|
*/
|
||||||
ret = vmstate_save_state(f, field->vmsd, curr_elem,
|
inner_field = vmsd_create_fake_nullptr_field(field);
|
||||||
vmdesc_loop);
|
is_null = true;
|
||||||
} else if (field->flags & VMS_VSTRUCT) {
|
|
||||||
ret = vmstate_save_state_v(f, field->vmsd, curr_elem,
|
|
||||||
vmdesc_loop,
|
|
||||||
field->struct_version_id, errp);
|
|
||||||
} else {
|
} else {
|
||||||
ret = field->info->put(f, curr_elem, size, field,
|
inner_field = field;
|
||||||
vmdesc_loop);
|
is_null = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to the fake nullptr handling above, if there's mixed
|
||||||
|
* null/non-null data, it doesn't make sense to emit a
|
||||||
|
* compressed array representation spanning the entire array
|
||||||
|
* because the field types will be different (e.g. struct
|
||||||
|
* vs. nullptr). Search ahead for the next null/non-null element
|
||||||
|
* and start a new compressed array if found.
|
||||||
|
*/
|
||||||
|
if (field->flags & VMS_ARRAY_OF_POINTER &&
|
||||||
|
is_null != is_prev_null) {
|
||||||
|
|
||||||
|
is_prev_null = is_null;
|
||||||
|
vmdesc_loop = vmdesc;
|
||||||
|
|
||||||
|
for (int j = i + 1; j < n_elems; j++) {
|
||||||
|
void *elem = *(void **)(first_elem + size * j);
|
||||||
|
bool elem_is_null = !elem && size;
|
||||||
|
|
||||||
|
if (is_null != elem_is_null) {
|
||||||
|
max_elems = j - i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vmsd_desc_field_start(vmsd, vmdesc_loop, inner_field,
|
||||||
|
i, max_elems);
|
||||||
|
|
||||||
|
if (inner_field->flags & VMS_STRUCT) {
|
||||||
|
ret = vmstate_save_state(f, inner_field->vmsd,
|
||||||
|
curr_elem, vmdesc_loop);
|
||||||
|
} else if (inner_field->flags & VMS_VSTRUCT) {
|
||||||
|
ret = vmstate_save_state_v(f, inner_field->vmsd,
|
||||||
|
curr_elem, vmdesc_loop,
|
||||||
|
inner_field->struct_version_id,
|
||||||
|
errp);
|
||||||
|
} else {
|
||||||
|
ret = inner_field->info->put(f, curr_elem, size,
|
||||||
|
inner_field, vmdesc_loop);
|
||||||
|
}
|
||||||
|
|
||||||
|
written_bytes = qemu_file_transferred(f) - old_offset;
|
||||||
|
vmsd_desc_field_end(vmsd, vmdesc_loop, inner_field,
|
||||||
|
written_bytes);
|
||||||
|
|
||||||
|
/* If we used a fake temp field.. free it now */
|
||||||
|
if (inner_field != field) {
|
||||||
|
g_clear_pointer((gpointer *)&inner_field, g_free);
|
||||||
|
}
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
error_setg(errp, "Save of field %s/%s failed",
|
error_setg(errp, "Save of field %s/%s failed",
|
||||||
vmsd->name, field->name);
|
vmsd->name, field->name);
|
||||||
@ -419,9 +517,6 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
written_bytes = qemu_file_transferred(f) - old_offset;
|
|
||||||
vmsd_desc_field_end(vmsd, vmdesc_loop, field, written_bytes, i);
|
|
||||||
|
|
||||||
/* Compressed arrays only care about the first element */
|
/* Compressed arrays only care about the first element */
|
||||||
if (vmdesc_loop && vmsd_can_compress(field)) {
|
if (vmdesc_loop && vmsd_can_compress(field)) {
|
||||||
vmdesc_loop = NULL;
|
vmdesc_loop = NULL;
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "qapi/type-helpers.h"
|
#include "qapi/type-helpers.h"
|
||||||
#include "hw/mem/memory-device.h"
|
#include "hw/mem/memory-device.h"
|
||||||
#include "hw/intc/intc.h"
|
#include "hw/intc/intc.h"
|
||||||
|
#include "migration/misc.h"
|
||||||
|
|
||||||
NameInfo *qmp_query_name(Error **errp)
|
NameInfo *qmp_query_name(Error **errp)
|
||||||
{
|
{
|
||||||
@ -96,21 +97,18 @@ void qmp_cont(Error **errp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Continuing after completed migration. Images have been inactivated to
|
|
||||||
* allow the destination to take control. Need to get control back now.
|
|
||||||
*
|
|
||||||
* If there are no inactive block nodes (e.g. because the VM was just
|
|
||||||
* paused rather than completing a migration), bdrv_inactivate_all() simply
|
|
||||||
* doesn't do anything. */
|
|
||||||
bdrv_activate_all(&local_err);
|
|
||||||
if (local_err) {
|
|
||||||
error_propagate(errp, local_err);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (runstate_check(RUN_STATE_INMIGRATE)) {
|
if (runstate_check(RUN_STATE_INMIGRATE)) {
|
||||||
autostart = 1;
|
autostart = 1;
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* Continuing after completed migration. Images have been
|
||||||
|
* inactivated to allow the destination to take control. Need to
|
||||||
|
* get control back now.
|
||||||
|
*/
|
||||||
|
if (!migration_block_activate(&local_err)) {
|
||||||
|
error_propagate(errp, local_err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
vm_start();
|
vm_start();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,6 +65,9 @@ class MigrationFile(object):
|
|||||||
def tell(self):
|
def tell(self):
|
||||||
return self.file.tell()
|
return self.file.tell()
|
||||||
|
|
||||||
|
def seek(self, a, b):
|
||||||
|
return self.file.seek(a, b)
|
||||||
|
|
||||||
# The VMSD description is at the end of the file, after EOF. Look for
|
# The VMSD description is at the end of the file, after EOF. Look for
|
||||||
# the last NULL byte, then for the beginning brace of JSON.
|
# the last NULL byte, then for the beginning brace of JSON.
|
||||||
def read_migration_debug_json(self):
|
def read_migration_debug_json(self):
|
||||||
@ -272,11 +275,24 @@ class S390StorageAttributes(object):
|
|||||||
self.section_key = section_key
|
self.section_key = section_key
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
|
pos = 0
|
||||||
while True:
|
while True:
|
||||||
addr_flags = self.file.read64()
|
addr_flags = self.file.read64()
|
||||||
flags = addr_flags & 0xfff
|
flags = addr_flags & 0xfff
|
||||||
if (flags & (self.STATTR_FLAG_DONE | self.STATTR_FLAG_EOS)):
|
|
||||||
|
if flags & self.STATTR_FLAG_DONE:
|
||||||
|
pos = self.file.tell()
|
||||||
|
continue
|
||||||
|
elif flags & self.STATTR_FLAG_EOS:
|
||||||
return
|
return
|
||||||
|
else:
|
||||||
|
# No EOS came after DONE, that's OK, but rewind the
|
||||||
|
# stream because this is not our data.
|
||||||
|
if pos:
|
||||||
|
self.file.seek(pos, os.SEEK_SET)
|
||||||
|
return
|
||||||
|
raise Exception("Unknown flags %x", flags)
|
||||||
|
|
||||||
if (flags & self.STATTR_FLAG_ERROR):
|
if (flags & self.STATTR_FLAG_ERROR):
|
||||||
raise Exception("Error in migration stream")
|
raise Exception("Error in migration stream")
|
||||||
count = self.file.read64()
|
count = self.file.read64()
|
||||||
@ -401,6 +417,28 @@ class VMSDFieldIntLE(VMSDFieldInt):
|
|||||||
super(VMSDFieldIntLE, self).__init__(desc, file)
|
super(VMSDFieldIntLE, self).__init__(desc, file)
|
||||||
self.dtype = '<i%d' % self.size
|
self.dtype = '<i%d' % self.size
|
||||||
|
|
||||||
|
class VMSDFieldNull(VMSDFieldGeneric):
|
||||||
|
NULL_PTR_MARKER = b'0'
|
||||||
|
|
||||||
|
def __init__(self, desc, file):
|
||||||
|
super(VMSDFieldNull, self).__init__(desc, file)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
# A NULL pointer is encoded in the stream as a '0' to
|
||||||
|
# disambiguate from a mere 0x0 value and avoid consumers
|
||||||
|
# trying to follow the NULL pointer. Displaying '0', 0x30 or
|
||||||
|
# 0x0 when analyzing the JSON debug stream could become
|
||||||
|
# confusing, so use an explicit term instead.
|
||||||
|
return "nullptr"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.__repr__()
|
||||||
|
|
||||||
|
def read(self):
|
||||||
|
super(VMSDFieldNull, self).read()
|
||||||
|
assert(self.data == self.NULL_PTR_MARKER)
|
||||||
|
return self.data
|
||||||
|
|
||||||
class VMSDFieldBool(VMSDFieldGeneric):
|
class VMSDFieldBool(VMSDFieldGeneric):
|
||||||
def __init__(self, desc, file):
|
def __init__(self, desc, file):
|
||||||
super(VMSDFieldBool, self).__init__(desc, file)
|
super(VMSDFieldBool, self).__init__(desc, file)
|
||||||
@ -429,6 +467,9 @@ class VMSDFieldStruct(VMSDFieldGeneric):
|
|||||||
super(VMSDFieldStruct, self).__init__(desc, file)
|
super(VMSDFieldStruct, self).__init__(desc, file)
|
||||||
self.data = collections.OrderedDict()
|
self.data = collections.OrderedDict()
|
||||||
|
|
||||||
|
if 'fields' not in self.desc['struct']:
|
||||||
|
raise Exception("No fields in struct. VMSD:\n%s" % self.desc)
|
||||||
|
|
||||||
# When we see compressed array elements, unfold them here
|
# When we see compressed array elements, unfold them here
|
||||||
new_fields = []
|
new_fields = []
|
||||||
for field in self.desc['struct']['fields']:
|
for field in self.desc['struct']['fields']:
|
||||||
@ -461,15 +502,25 @@ class VMSDFieldStruct(VMSDFieldGeneric):
|
|||||||
field['data'] = reader(field, self.file)
|
field['data'] = reader(field, self.file)
|
||||||
field['data'].read()
|
field['data'].read()
|
||||||
|
|
||||||
if 'index' in field:
|
fname = field['name']
|
||||||
if field['name'] not in self.data:
|
fdata = field['data']
|
||||||
self.data[field['name']] = []
|
|
||||||
a = self.data[field['name']]
|
# The field could be:
|
||||||
if len(a) != int(field['index']):
|
# i) a single data entry, e.g. uint64
|
||||||
raise Exception("internal index of data field unmatched (%d/%d)" % (len(a), int(field['index'])))
|
# ii) an array, indicated by it containing the 'index' key
|
||||||
a.append(field['data'])
|
#
|
||||||
|
# However, the overall data after parsing the whole
|
||||||
|
# stream, could be a mix of arrays and single data fields,
|
||||||
|
# all sharing the same field name due to how QEMU breaks
|
||||||
|
# up arrays with NULL pointers into multiple compressed
|
||||||
|
# array segments.
|
||||||
|
if fname not in self.data:
|
||||||
|
self.data[fname] = fdata
|
||||||
|
elif type(self.data[fname]) == list:
|
||||||
|
self.data[fname].append(fdata)
|
||||||
else:
|
else:
|
||||||
self.data[field['name']] = field['data']
|
tmp = self.data[fname]
|
||||||
|
self.data[fname] = [tmp, fdata]
|
||||||
|
|
||||||
if 'subsections' in self.desc['struct']:
|
if 'subsections' in self.desc['struct']:
|
||||||
for subsection in self.desc['struct']['subsections']:
|
for subsection in self.desc['struct']['subsections']:
|
||||||
@ -477,6 +528,10 @@ class VMSDFieldStruct(VMSDFieldGeneric):
|
|||||||
raise Exception("Subsection %s not found at offset %x" % ( subsection['vmsd_name'], self.file.tell()))
|
raise Exception("Subsection %s not found at offset %x" % ( subsection['vmsd_name'], self.file.tell()))
|
||||||
name = self.file.readstr()
|
name = self.file.readstr()
|
||||||
version_id = self.file.read32()
|
version_id = self.file.read32()
|
||||||
|
|
||||||
|
if not subsection:
|
||||||
|
raise Exception("Empty description for subsection: %s" % name)
|
||||||
|
|
||||||
self.data[name] = VMSDSection(self.file, version_id, subsection, (name, 0))
|
self.data[name] = VMSDSection(self.file, version_id, subsection, (name, 0))
|
||||||
self.data[name].read()
|
self.data[name].read()
|
||||||
|
|
||||||
@ -535,6 +590,7 @@ vmsd_field_readers = {
|
|||||||
"bitmap" : VMSDFieldGeneric,
|
"bitmap" : VMSDFieldGeneric,
|
||||||
"struct" : VMSDFieldStruct,
|
"struct" : VMSDFieldStruct,
|
||||||
"capability": VMSDFieldCap,
|
"capability": VMSDFieldCap,
|
||||||
|
"nullptr": VMSDFieldNull,
|
||||||
"unknown" : VMSDFieldGeneric,
|
"unknown" : VMSDFieldGeneric,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -574,10 +630,13 @@ class MigrationDump(object):
|
|||||||
}
|
}
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.vmsd_desc = None
|
self.vmsd_desc = None
|
||||||
|
self.vmsd_json = ""
|
||||||
|
|
||||||
def read(self, desc_only = False, dump_memory = False, write_memory = False):
|
def read(self, desc_only = False, dump_memory = False,
|
||||||
|
write_memory = False):
|
||||||
# Read in the whole file
|
# Read in the whole file
|
||||||
file = MigrationFile(self.filename)
|
file = MigrationFile(self.filename)
|
||||||
|
self.vmsd_json = file.read_migration_debug_json()
|
||||||
|
|
||||||
# File magic
|
# File magic
|
||||||
data = file.read32()
|
data = file.read32()
|
||||||
@ -635,9 +694,11 @@ class MigrationDump(object):
|
|||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
def load_vmsd_json(self, file):
|
def load_vmsd_json(self, file):
|
||||||
vmsd_json = file.read_migration_debug_json()
|
self.vmsd_desc = json.loads(self.vmsd_json,
|
||||||
self.vmsd_desc = json.loads(vmsd_json, object_pairs_hook=collections.OrderedDict)
|
object_pairs_hook=collections.OrderedDict)
|
||||||
for device in self.vmsd_desc['devices']:
|
for device in self.vmsd_desc['devices']:
|
||||||
|
if 'fields' not in device:
|
||||||
|
raise Exception("vmstate for device %s has no fields" % device['name'])
|
||||||
key = (device['name'], device['instance_id'])
|
key = (device['name'], device['instance_id'])
|
||||||
value = ( VMSDSection, device )
|
value = ( VMSDSection, device )
|
||||||
self.section_classes[key] = value
|
self.section_classes[key] = value
|
||||||
@ -666,31 +727,34 @@ args = parser.parse_args()
|
|||||||
|
|
||||||
jsonenc = JSONEncoder(indent=4, separators=(',', ': '))
|
jsonenc = JSONEncoder(indent=4, separators=(',', ': '))
|
||||||
|
|
||||||
if args.extract:
|
if not any([args.extract, args.dump == "state", args.dump == "desc"]):
|
||||||
dump = MigrationDump(args.file)
|
|
||||||
|
|
||||||
dump.read(desc_only = True)
|
|
||||||
print("desc.json")
|
|
||||||
f = open("desc.json", "w")
|
|
||||||
f.truncate()
|
|
||||||
f.write(jsonenc.encode(dump.vmsd_desc))
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
dump.read(write_memory = True)
|
|
||||||
dict = dump.getDict()
|
|
||||||
print("state.json")
|
|
||||||
f = open("state.json", "w")
|
|
||||||
f.truncate()
|
|
||||||
f.write(jsonenc.encode(dict))
|
|
||||||
f.close()
|
|
||||||
elif args.dump == "state":
|
|
||||||
dump = MigrationDump(args.file)
|
|
||||||
dump.read(dump_memory = args.memory)
|
|
||||||
dict = dump.getDict()
|
|
||||||
print(jsonenc.encode(dict))
|
|
||||||
elif args.dump == "desc":
|
|
||||||
dump = MigrationDump(args.file)
|
|
||||||
dump.read(desc_only = True)
|
|
||||||
print(jsonenc.encode(dump.vmsd_desc))
|
|
||||||
else:
|
|
||||||
raise Exception("Please specify either -x, -d state or -d desc")
|
raise Exception("Please specify either -x, -d state or -d desc")
|
||||||
|
|
||||||
|
try:
|
||||||
|
dump = MigrationDump(args.file)
|
||||||
|
|
||||||
|
if args.extract:
|
||||||
|
dump.read(desc_only = True)
|
||||||
|
|
||||||
|
print("desc.json")
|
||||||
|
f = open("desc.json", "w")
|
||||||
|
f.truncate()
|
||||||
|
f.write(jsonenc.encode(dump.vmsd_desc))
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
dump.read(write_memory = True)
|
||||||
|
dict = dump.getDict()
|
||||||
|
print("state.json")
|
||||||
|
f = open("state.json", "w")
|
||||||
|
f.truncate()
|
||||||
|
f.write(jsonenc.encode(dict))
|
||||||
|
f.close()
|
||||||
|
elif args.dump == "state":
|
||||||
|
dump.read(dump_memory = args.memory)
|
||||||
|
dict = dump.getDict()
|
||||||
|
print(jsonenc.encode(dict))
|
||||||
|
elif args.dump == "desc":
|
||||||
|
dump.read(desc_only = True)
|
||||||
|
print(jsonenc.encode(dump.vmsd_desc))
|
||||||
|
except Exception:
|
||||||
|
raise Exception("Full JSON dump:\n%s", dump.vmsd_json)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user