From 6436db5a609c16c57e50e525e45e2b88cf925fff Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 20 Oct 2023 16:55:54 +0200 Subject: [PATCH 01/40] hw/ipmi: Don't call vmstate_register() from instance_init() functions instance_init() can be called multiple times, e.g. during introspection of the device. We should not install the vmstate handlers here. Do it in the realize() function instead. Signed-off-by: Thomas Huth Reviewed-by: Juan Quintela Acked-by: Corey Minyard Signed-off-by: Juan Quintela Message-ID: <20231020145554.662751-1-thuth@redhat.com> --- hw/ipmi/ipmi_bmc_extern.c | 29 ++++++++++++----------- hw/ipmi/isa_ipmi_bt.c | 34 +++++++++++++------------- hw/ipmi/isa_ipmi_kcs.c | 50 +++++++++++++++++++-------------------- 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/hw/ipmi/ipmi_bmc_extern.c b/hw/ipmi/ipmi_bmc_extern.c index e232d35ba2..2117dad35a 100644 --- a/hw/ipmi/ipmi_bmc_extern.c +++ b/hw/ipmi/ipmi_bmc_extern.c @@ -453,19 +453,6 @@ static void ipmi_bmc_extern_handle_reset(IPMIBmc *b) continue_send(ibe); } -static void ipmi_bmc_extern_realize(DeviceState *dev, Error **errp) -{ - IPMIBmcExtern *ibe = IPMI_BMC_EXTERN(dev); - - if (!qemu_chr_fe_backend_connected(&ibe->chr)) { - error_setg(errp, "IPMI external bmc requires chardev attribute"); - return; - } - - qemu_chr_fe_set_handlers(&ibe->chr, can_receive, receive, - chr_event, NULL, ibe, NULL, true); -} - static int ipmi_bmc_extern_post_migrate(void *opaque, int version_id) { IPMIBmcExtern *ibe = opaque; @@ -499,12 +486,26 @@ static const VMStateDescription vmstate_ipmi_bmc_extern = { } }; +static void ipmi_bmc_extern_realize(DeviceState *dev, Error **errp) +{ + IPMIBmcExtern *ibe = IPMI_BMC_EXTERN(dev); + + if (!qemu_chr_fe_backend_connected(&ibe->chr)) { + error_setg(errp, "IPMI external bmc requires chardev attribute"); + return; + } + + qemu_chr_fe_set_handlers(&ibe->chr, can_receive, receive, + chr_event, NULL, ibe, NULL, true); + + vmstate_register(NULL, 0, &vmstate_ipmi_bmc_extern, ibe); +} + static void ipmi_bmc_extern_init(Object *obj) { IPMIBmcExtern *ibe = IPMI_BMC_EXTERN(obj); ibe->extern_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, extern_timeout, ibe); - vmstate_register(NULL, 0, &vmstate_ipmi_bmc_extern, ibe); } static void ipmi_bmc_extern_finalize(Object *obj) diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c index a83e7243d6..aec064d3cd 100644 --- a/hw/ipmi/isa_ipmi_bt.c +++ b/hw/ipmi/isa_ipmi_bt.c @@ -68,6 +68,21 @@ static void isa_ipmi_bt_lower_irq(IPMIBT *ib) qemu_irq_lower(iib->irq); } +static const VMStateDescription vmstate_ISAIPMIBTDevice = { + .name = TYPE_IPMI_INTERFACE_PREFIX "isa-bt", + .version_id = 2, + .minimum_version_id = 2, + /* + * Version 1 had messed up the array transfer, it's not even usable + * because it used VMSTATE_VBUFFER_UINT32, but it did not transfer + * the buffer length, so random things would happen. + */ + .fields = (VMStateField[]) { + VMSTATE_STRUCT(bt, ISAIPMIBTDevice, 1, vmstate_IPMIBT, IPMIBT), + VMSTATE_END_OF_LIST() + } +}; + static void isa_ipmi_bt_realize(DeviceState *dev, Error **errp) { Error *err = NULL; @@ -102,30 +117,15 @@ static void isa_ipmi_bt_realize(DeviceState *dev, Error **errp) qdev_set_legacy_instance_id(dev, iib->bt.io_base, iib->bt.io_length); isa_register_ioport(isadev, &iib->bt.io, iib->bt.io_base); -} -static const VMStateDescription vmstate_ISAIPMIBTDevice = { - .name = TYPE_IPMI_INTERFACE_PREFIX "isa-bt", - .version_id = 2, - .minimum_version_id = 2, - /* - * Version 1 had messed up the array transfer, it's not even usable - * because it used VMSTATE_VBUFFER_UINT32, but it did not transfer - * the buffer length, so random things would happen. - */ - .fields = (VMStateField[]) { - VMSTATE_STRUCT(bt, ISAIPMIBTDevice, 1, vmstate_IPMIBT, IPMIBT), - VMSTATE_END_OF_LIST() - } -}; + vmstate_register(NULL, 0, &vmstate_ISAIPMIBTDevice, dev); +} static void isa_ipmi_bt_init(Object *obj) { ISAIPMIBTDevice *iib = ISA_IPMI_BT(obj); ipmi_bmc_find_and_link(obj, (Object **) &iib->bt.bmc); - - vmstate_register(NULL, 0, &vmstate_ISAIPMIBTDevice, iib); } static void *isa_ipmi_bt_get_backend_data(IPMIInterface *ii) diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c index b2ed70b9da..b5dcb64616 100644 --- a/hw/ipmi/isa_ipmi_kcs.c +++ b/hw/ipmi/isa_ipmi_kcs.c @@ -67,6 +67,24 @@ static void isa_ipmi_kcs_lower_irq(IPMIKCS *ik) qemu_irq_lower(iik->irq); } +static bool vmstate_kcs_before_version2(void *opaque, int version) +{ + return version <= 1; +} + +static const VMStateDescription vmstate_ISAIPMIKCSDevice = { + .name = TYPE_IPMI_INTERFACE, + .version_id = 2, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VSTRUCT_TEST(kcs, ISAIPMIKCSDevice, vmstate_kcs_before_version2, + 0, vmstate_IPMIKCS, IPMIKCS, 1), + VMSTATE_VSTRUCT_V(kcs, ISAIPMIKCSDevice, 2, vmstate_IPMIKCS, + IPMIKCS, 2), + VMSTATE_END_OF_LIST() + } +}; + static void ipmi_isa_realize(DeviceState *dev, Error **errp) { Error *err = NULL; @@ -101,31 +119,6 @@ static void ipmi_isa_realize(DeviceState *dev, Error **errp) qdev_set_legacy_instance_id(dev, iik->kcs.io_base, iik->kcs.io_length); isa_register_ioport(isadev, &iik->kcs.io, iik->kcs.io_base); -} - -static bool vmstate_kcs_before_version2(void *opaque, int version) -{ - return version <= 1; -} - -static const VMStateDescription vmstate_ISAIPMIKCSDevice = { - .name = TYPE_IPMI_INTERFACE, - .version_id = 2, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_VSTRUCT_TEST(kcs, ISAIPMIKCSDevice, vmstate_kcs_before_version2, - 0, vmstate_IPMIKCS, IPMIKCS, 1), - VMSTATE_VSTRUCT_V(kcs, ISAIPMIKCSDevice, 2, vmstate_IPMIKCS, - IPMIKCS, 2), - VMSTATE_END_OF_LIST() - } -}; - -static void isa_ipmi_kcs_init(Object *obj) -{ - ISAIPMIKCSDevice *iik = ISA_IPMI_KCS(obj); - - ipmi_bmc_find_and_link(obj, (Object **) &iik->kcs.bmc); /* * Version 1 had an incorrect name, it clashed with the BT @@ -135,6 +128,13 @@ static void isa_ipmi_kcs_init(Object *obj) vmstate_register(NULL, 0, &vmstate_ISAIPMIKCSDevice, iik); } +static void isa_ipmi_kcs_init(Object *obj) +{ + ISAIPMIKCSDevice *iik = ISA_IPMI_KCS(obj); + + ipmi_bmc_find_and_link(obj, (Object **) &iik->kcs.bmc); +} + static void *isa_ipmi_kcs_get_backend_data(IPMIInterface *ii) { ISAIPMIKCSDevice *iik = ISA_IPMI_KCS(ii); From 2fb40d1b949a98c1742a9277e716da9bc502ffe4 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 20 Oct 2023 17:05:52 +0200 Subject: [PATCH 02/40] hw/s390x/s390-skeys: Don't call register_savevm_live() during instance_init() Since the instance_init() function immediately tries to set the property to "true", the s390_skeys_set_migration_enabled() tries to register a savevm handler during instance_init(). However, instance_init() functions can be called multiple times, e.g. for introspection of devices. That means multiple instances of devices can be created during runtime (which is fine as long as they all don't get realized, too), so the "Prevent double registration of savevm handler" check in the s390_skeys_set_migration_enabled() function does not work at all as expected (since there could be more than one instance). Thus we must not call register_savevm_live() from an instance_init() function at all. Move this to the realize() function instead. This way we can also get rid of the property getter and setter functions completely, simplifying the code along the way quite a bit. Acked-by: David Hildenbrand Reviewed-by: Eric Farman Acked-by: Juan Quintela Signed-off-by: Thomas Huth Signed-off-by: Juan Quintela Message-ID: <20231020150554.664422-2-thuth@redhat.com> --- hw/s390x/s390-skeys.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index 5024faf411..8f5159d85d 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "hw/boards.h" +#include "hw/qdev-properties.h" #include "hw/s390x/storage-keys.h" #include "qapi/error.h" #include "qapi/qapi-commands-misc-target.h" @@ -432,58 +433,39 @@ static int s390_storage_keys_load(QEMUFile *f, void *opaque, int version_id) return ret; } -static inline bool s390_skeys_get_migration_enabled(Object *obj, Error **errp) -{ - S390SKeysState *ss = S390_SKEYS(obj); - - return ss->migration_enabled; -} - static SaveVMHandlers savevm_s390_storage_keys = { .save_state = s390_storage_keys_save, .load_state = s390_storage_keys_load, }; -static inline void s390_skeys_set_migration_enabled(Object *obj, bool value, - Error **errp) +static void s390_skeys_realize(DeviceState *dev, Error **errp) { - S390SKeysState *ss = S390_SKEYS(obj); - - /* Prevent double registration of savevm handler */ - if (ss->migration_enabled == value) { - return; - } - - ss->migration_enabled = value; + S390SKeysState *ss = S390_SKEYS(dev); if (ss->migration_enabled) { register_savevm_live(TYPE_S390_SKEYS, 0, 1, &savevm_s390_storage_keys, ss); - } else { - unregister_savevm(VMSTATE_IF(ss), TYPE_S390_SKEYS, ss); } } -static void s390_skeys_instance_init(Object *obj) -{ - object_property_add_bool(obj, "migration-enabled", - s390_skeys_get_migration_enabled, - s390_skeys_set_migration_enabled); - object_property_set_bool(obj, "migration-enabled", true, NULL); -} +static Property s390_skeys_props[] = { + DEFINE_PROP_BOOL("migration-enabled", S390SKeysState, migration_enabled, true), + DEFINE_PROP_END_OF_LIST(), +}; static void s390_skeys_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); dc->hotpluggable = false; + dc->realize = s390_skeys_realize; + device_class_set_props(dc, s390_skeys_props); set_bit(DEVICE_CATEGORY_MISC, dc->categories); } static const TypeInfo s390_skeys_info = { .name = TYPE_S390_SKEYS, .parent = TYPE_DEVICE, - .instance_init = s390_skeys_instance_init, .instance_size = sizeof(S390SKeysState), .class_init = s390_skeys_class_init, .class_size = sizeof(S390SKeysClass), From bc1bf2ae2f76d451126fb6a15f00d8a2cb9c4e76 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 20 Oct 2023 17:05:53 +0200 Subject: [PATCH 03/40] hw/s390x/s390-stattrib: Simplify handling of the "migration-enabled" property There's no need for dedicated handlers here if they don't do anything special. Acked-by: David Hildenbrand Reviewed-by: Eric Farman Acked-by: Juan Quintela Signed-off-by: Thomas Huth Signed-off-by: Juan Quintela Message-ID: <20231020150554.664422-3-thuth@redhat.com> --- hw/s390x/s390-stattrib.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index 220e845d12..4f1ab57437 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -13,6 +13,7 @@ #include "qemu/units.h" #include "migration/qemu-file.h" #include "migration/register.h" +#include "hw/qdev-properties.h" #include "hw/s390x/storage-attributes.h" #include "qemu/error-report.h" #include "exec/ram_addr.h" @@ -340,6 +341,11 @@ static void s390_stattrib_realize(DeviceState *dev, Error **errp) } } +static Property s390_stattrib_props[] = { + DEFINE_PROP_BOOL("migration-enabled", S390StAttribState, migration_enabled, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void s390_stattrib_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -347,22 +353,7 @@ static void s390_stattrib_class_init(ObjectClass *oc, void *data) dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->realize = s390_stattrib_realize; -} - -static inline bool s390_stattrib_get_migration_enabled(Object *obj, - Error **errp) -{ - S390StAttribState *s = S390_STATTRIB(obj); - - return s->migration_enabled; -} - -static inline void s390_stattrib_set_migration_enabled(Object *obj, bool value, - Error **errp) -{ - S390StAttribState *s = S390_STATTRIB(obj); - - s->migration_enabled = value; + device_class_set_props(dc, s390_stattrib_props); } static SaveVMHandlers savevm_s390_stattrib_handlers = { @@ -383,10 +374,6 @@ static void s390_stattrib_instance_init(Object *obj) register_savevm_live(TYPE_S390_STATTRIB, 0, 0, &savevm_s390_stattrib_handlers, sas); - object_property_add_bool(obj, "migration-enabled", - s390_stattrib_get_migration_enabled, - s390_stattrib_set_migration_enabled); - object_property_set_bool(obj, "migration-enabled", true, NULL); sas->migration_cur_gfn = 0; } From 832685707fbe52b2525974a100a240338998e544 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 20 Oct 2023 17:05:54 +0200 Subject: [PATCH 04/40] hw/s390x/s390-stattrib: Don't call register_savevm_live() during instance_init() We must not call register_savevm_live() from an instance_init() function (since this could be called multiple times during device introspection). Move this to the realize() function instead. Acked-by: David Hildenbrand Reviewed-by: Eric Farman Signed-off-by: Juan Quintela Signed-off-by: Thomas Huth Signed-off-by: Juan Quintela Message-ID: <20231020150554.664422-4-thuth@redhat.com> --- hw/s390x/s390-stattrib.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index 4f1ab57437..c483b62a9b 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -331,6 +331,17 @@ static const TypeInfo qemu_s390_stattrib_info = { /* Generic abstract object: */ +static SaveVMHandlers savevm_s390_stattrib_handlers = { + .save_setup = cmma_save_setup, + .save_live_iterate = cmma_save_iterate, + .save_live_complete_precopy = cmma_save_complete, + .state_pending_exact = cmma_state_pending, + .state_pending_estimate = cmma_state_pending, + .save_cleanup = cmma_save_cleanup, + .load_state = cmma_load, + .is_active = cmma_active, +}; + static void s390_stattrib_realize(DeviceState *dev, Error **errp) { bool ambiguous = false; @@ -338,7 +349,11 @@ static void s390_stattrib_realize(DeviceState *dev, Error **errp) object_resolve_path_type("", TYPE_S390_STATTRIB, &ambiguous); if (ambiguous) { error_setg(errp, "storage_attributes device already exists"); + return; } + + register_savevm_live(TYPE_S390_STATTRIB, 0, 0, + &savevm_s390_stattrib_handlers, dev); } static Property s390_stattrib_props[] = { @@ -356,24 +371,10 @@ static void s390_stattrib_class_init(ObjectClass *oc, void *data) device_class_set_props(dc, s390_stattrib_props); } -static SaveVMHandlers savevm_s390_stattrib_handlers = { - .save_setup = cmma_save_setup, - .save_live_iterate = cmma_save_iterate, - .save_live_complete_precopy = cmma_save_complete, - .state_pending_exact = cmma_state_pending, - .state_pending_estimate = cmma_state_pending, - .save_cleanup = cmma_save_cleanup, - .load_state = cmma_load, - .is_active = cmma_active, -}; - static void s390_stattrib_instance_init(Object *obj) { S390StAttribState *sas = S390_STATTRIB(obj); - register_savevm_live(TYPE_S390_STATTRIB, 0, 0, - &savevm_s390_stattrib_handlers, sas); - sas->migration_cur_gfn = 0; } From 71daf640d91d49a784ead059c9021070a2107008 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:19 +0200 Subject: [PATCH 05/40] migration: Create vmstate_register_any() We have lots of cases where we are using an instance_id==0 when we should be using VMSTATE_INSTANCE_ID_ANY (-1). Basically everything that can have more than one needs to have a proper instance_id or -1 and the system will take one for it. vmstate_register_any(): We register with -1. Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-2-quintela@redhat.com> --- include/migration/vmstate.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 1af181877c..1ea97ccf2d 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1230,6 +1230,23 @@ static inline int vmstate_register(VMStateIf *obj, int instance_id, opaque, -1, 0, NULL); } +/** + * vmstate_register_any() - legacy function to register state + * serialisation description and let the function choose the id + * + * New code shouldn't be using this function as QOM-ified devices have + * dc->vmsd to store the serialisation description. + * + * Returns: 0 on success, -1 on failure + */ +static inline int vmstate_register_any(VMStateIf *obj, + const VMStateDescription *vmsd, + void *opaque) +{ + return vmstate_register_with_alias_id(obj, VMSTATE_INSTANCE_ID_ANY, vmsd, + opaque, -1, 0, NULL); +} + void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd, void *opaque); From 99b16e8ee4789432cc543fde45902796385713c0 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:20 +0200 Subject: [PATCH 06/40] migration: Use vmstate_register_any() This are the easiest cases, where we were already using VMSTATE_INSTANCE_ID_ANY. Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-3-quintela@redhat.com> --- backends/dbus-vmstate.c | 3 +-- backends/tpm/tpm_emulator.c | 3 +-- hw/i2c/core.c | 2 +- hw/input/adb.c | 2 +- hw/input/ads7846.c | 2 +- hw/input/stellaris_input.c | 3 +-- hw/net/eepro100.c | 3 +-- hw/pci/pci.c | 2 +- hw/ppc/spapr_nvdimm.c | 3 +-- hw/timer/arm_timer.c | 2 +- hw/virtio/virtio-mem.c | 4 ++-- 11 files changed, 12 insertions(+), 17 deletions(-) diff --git a/backends/dbus-vmstate.c b/backends/dbus-vmstate.c index 57369ec0f2..a9d8cb0acd 100644 --- a/backends/dbus-vmstate.c +++ b/backends/dbus-vmstate.c @@ -426,8 +426,7 @@ dbus_vmstate_complete(UserCreatable *uc, Error **errp) return; } - if (vmstate_register(VMSTATE_IF(self), VMSTATE_INSTANCE_ID_ANY, - &dbus_vmstate, self) < 0) { + if (vmstate_register_any(VMSTATE_IF(self), &dbus_vmstate, self) < 0) { error_setg(errp, "Failed to register vmstate"); } } diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index bf1a90f5d7..f7f1b4ad7a 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -975,8 +975,7 @@ static void tpm_emulator_inst_init(Object *obj) qemu_add_vm_change_state_handler(tpm_emulator_vm_state_change, tpm_emu); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, - &vmstate_tpm_emulator, obj); + vmstate_register_any(NULL, &vmstate_tpm_emulator, obj); } /* diff --git a/hw/i2c/core.c b/hw/i2c/core.c index bed594fe59..879a1d45cb 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -64,7 +64,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) bus = I2C_BUS(qbus_new(TYPE_I2C_BUS, parent, name)); QLIST_INIT(&bus->current_devs); QSIMPLEQ_INIT(&bus->pending_masters); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); + vmstate_register_any(NULL, &vmstate_i2c_bus, bus); return bus; } diff --git a/hw/input/adb.c b/hw/input/adb.c index 214ae6f42b..8aed0da2cd 100644 --- a/hw/input/adb.c +++ b/hw/input/adb.c @@ -247,7 +247,7 @@ static void adb_bus_realize(BusState *qbus, Error **errp) adb_bus->autopoll_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, adb_autopoll, adb_bus); - vmstate_register(NULL, -1, &vmstate_adb_bus, adb_bus); + vmstate_register_any(NULL, &vmstate_adb_bus, adb_bus); } static void adb_bus_unrealize(BusState *qbus) diff --git a/hw/input/ads7846.c b/hw/input/ads7846.c index dc0998ac79..91116c6bdb 100644 --- a/hw/input/ads7846.c +++ b/hw/input/ads7846.c @@ -158,7 +158,7 @@ static void ads7846_realize(SSIPeripheral *d, Error **errp) ads7846_int_update(s); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); + vmstate_register_any(NULL, &vmstate_ads7846, s); } static void ads7846_class_init(ObjectClass *klass, void *data) diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c index e6ee5e11f1..a58721c8cd 100644 --- a/hw/input/stellaris_input.c +++ b/hw/input/stellaris_input.c @@ -88,6 +88,5 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) } s->num_buttons = n; qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, - &vmstate_stellaris_gamepad, s); + vmstate_register_any(NULL, &vmstate_stellaris_gamepad, s); } diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index dc07984ae9..94ce9e18ff 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -1883,8 +1883,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); s->vmstate->name = qemu_get_queue(s->nic)->model; - vmstate_register(VMSTATE_IF(&pci_dev->qdev), VMSTATE_INSTANCE_ID_ANY, - s->vmstate, s); + vmstate_register_any(VMSTATE_IF(&pci_dev->qdev), s->vmstate, s); } static void eepro100_instance_init(Object *obj) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 7d09e1a39d..885c04b6f5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -147,7 +147,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) bus->machine_done.notify = pcibus_machine_done; qemu_add_machine_init_done_notifier(&bus->machine_done); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + vmstate_register_any(NULL, &vmstate_pcibus, bus); } static void pcie_bus_realize(BusState *qbus, Error **errp) diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c index b2f009c816..ad7afe7544 100644 --- a/hw/ppc/spapr_nvdimm.c +++ b/hw/ppc/spapr_nvdimm.c @@ -876,8 +876,7 @@ static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp) s_nvdimm->hcall_flush_required = true; } - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, - &vmstate_spapr_nvdimm_states, dimm); + vmstate_register_any(NULL, &vmstate_spapr_nvdimm_states, dimm); } static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm) diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c index 69c8863472..9afe8da831 100644 --- a/hw/timer/arm_timer.c +++ b/hw/timer/arm_timer.c @@ -181,7 +181,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) s->control = TIMER_CTRL_IE; s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_LEGACY); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); + vmstate_register_any(NULL, &vmstate_arm_timer, s); return s; } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 9dc3c61b5a..a5ea3be414 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -1119,8 +1119,8 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) host_memory_backend_set_mapped(vmem->memdev, true); vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); if (vmem->early_migration) { - vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY, - &vmstate_virtio_mem_device_early, vmem); + vmstate_register_any(VMSTATE_IF(vmem), + &vmstate_virtio_mem_device_early, vmem); } qemu_register_reset(virtio_mem_system_reset, vmem); From 1f52c7a80bf19fef5228abe85cdb845088e07274 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:21 +0200 Subject: [PATCH 07/40] migration: Use vmstate_register_any() for isa-ide Otherwise qom-test fails. ok 4 /i386/qom/x-remote qemu-system-i386: savevm_state_handler_insert: Detected duplicate SaveStateEntry: id=isa-ide, instance_id=0x0 Broken pipe ../../../../../mnt/code/qemu/full/tests/qtest/libqtest.c:195: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0) Aborted (core dumped) $ Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-4-quintela@redhat.com> --- hw/ide/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ide/isa.c b/hw/ide/isa.c index 95053e026f..ea60c08116 100644 --- a/hw/ide/isa.c +++ b/hw/ide/isa.c @@ -73,7 +73,7 @@ static void isa_ide_realizefn(DeviceState *dev, Error **errp) ide_bus_init(&s->bus, sizeof(s->bus), dev, 0, 2); ide_init_ioport(&s->bus, isadev, s->iobase, s->iobase2); ide_bus_init_output_irq(&s->bus, isa_get_irq(isadev, s->irqnum)); - vmstate_register(VMSTATE_IF(dev), 0, &vmstate_ide_isa, s); + vmstate_register_any(VMSTATE_IF(dev), &vmstate_ide_isa, s); ide_bus_register_restart_cb(&s->bus); } From b23db4cd8218345a7ec75a8ff810896ce3a391b1 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:23 +0200 Subject: [PATCH 08/40] migration: Use VMSTATE_INSTANCE_ID_ANY for slirp Each user network conection create a new slirp instance. We register more than one slirp instance for number 0. qemu-system-x86_64: -netdev user,id=hs1: savevm_state_handler_insert: Detected duplicate SaveStateEntry: id=slirp, instance_id=0x0 Broken pipe ../../../../../mnt/code/qemu/full/tests/qtest/libqtest.c:195: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0) Aborted (core dumped) Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-6-quintela@redhat.com> --- net/slirp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/slirp.c b/net/slirp.c index c33b3e02e7..25b49c4526 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -46,6 +46,7 @@ #include "qapi/qmp/qdict.h" #include "util.h" #include "migration/register.h" +#include "migration/vmstate.h" #include "migration/qemu-file-types.h" static int get_str_sep(char *buf, int buf_size, const char **pp, int sep) @@ -659,8 +660,8 @@ static int net_slirp_init(NetClientState *peer, const char *model, * specific version? */ g_assert(slirp_state_version() == 4); - register_savevm_live("slirp", 0, slirp_state_version(), - &savevm_slirp_state, s->slirp); + register_savevm_live("slirp", VMSTATE_INSTANCE_ID_ANY, + slirp_state_version(), &savevm_slirp_state, s->slirp); s->poll_notifier.notify = net_slirp_poll_notify; main_loop_poll_add_notifier(&s->poll_notifier); From 485fb95546e1cc94e77b93ff785bdbfa2c1d3ff1 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:25 +0200 Subject: [PATCH 09/40] migration: Hack to maintain backwards compatibility for ppc Current code does: - register pre_2_10_vmstate_dummy_icp with "icp/server" and instance dependinfg on cpu number - for newer machines, it register vmstate_icp with "icp/server" name and instance 0 - now it unregisters "icp/server" for the 1st instance. This is wrong at many levels: - we shouldn't have two VMSTATEDescriptions with the same name - In case this is the only solution that we can came with, it needs to be: * register pre_2_10_vmstate_dummy_icp * unregister pre_2_10_vmstate_dummy_icp * register real vmstate_icp Created vmstate_replace_hack_for_ppc() with warnings left and right that it is a hack. CC: Cedric Le Goater CC: Daniel Henrique Barboza CC: David Gibson CC: Greg Kurz Reviewed-by: Nicholas Piggin Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-8-quintela@redhat.com> --- hw/intc/xics.c | 18 ++++++++++++++++-- hw/ppc/spapr.c | 25 +++++++++++++++++++++++-- include/migration/vmstate.h | 11 +++++++++++ migration/savevm.c | 18 ++++++++++++++++++ 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/hw/intc/xics.c b/hw/intc/xics.c index c7f8abd71e..c77e986136 100644 --- a/hw/intc/xics.c +++ b/hw/intc/xics.c @@ -335,8 +335,22 @@ static void icp_realize(DeviceState *dev, Error **errp) return; } } - - vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp); + /* + * The way that pre_2_10_icp is handling is really, really hacky. + * We used to have here this call: + * + * vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp); + * + * But we were doing: + * pre_2_10_vmstate_register_dummy_icp() + * this vmstate_register() + * pre_2_10_vmstate_unregister_dummy_icp() + * + * So for a short amount of time we had to vmstate entries with + * the same name. This fixes it. + */ + vmstate_replace_hack_for_ppc(NULL, icp->cs->cpu_index, + &vmstate_icp_server, icp); } static void icp_unrealize(DeviceState *dev) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b25093be28..df09aa9d6a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -143,6 +143,11 @@ static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque) } static const VMStateDescription pre_2_10_vmstate_dummy_icp = { + /* + * Hack ahead. We can't have two devices with the same name and + * instance id. So I rename this to pass make check. + * Real help from people who knows the hardware is needed. + */ .name = "icp/server", .version_id = 1, .minimum_version_id = 1, @@ -155,16 +160,32 @@ static const VMStateDescription pre_2_10_vmstate_dummy_icp = { }, }; +/* + * See comment in hw/intc/xics.c:icp_realize() + * + * You have to remove vmstate_replace_hack_for_ppc() when you remove + * the machine types that need the following function. + */ static void pre_2_10_vmstate_register_dummy_icp(int i) { vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp, (void *)(uintptr_t) i); } +/* + * See comment in hw/intc/xics.c:icp_realize() + * + * You have to remove vmstate_replace_hack_for_ppc() when you remove + * the machine types that need the following function. + */ static void pre_2_10_vmstate_unregister_dummy_icp(int i) { - vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp, - (void *)(uintptr_t) i); + /* + * This used to be: + * + * vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp, + * (void *)(uintptr_t) i); + */ } int spapr_max_server_number(SpaprMachineState *spapr) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 1ea97ccf2d..9821918631 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -1230,6 +1230,17 @@ static inline int vmstate_register(VMStateIf *obj, int instance_id, opaque, -1, 0, NULL); } +/** + * vmstate_replace_hack_for_ppc() - ppc used to abuse vmstate_register + * + * Don't even think about using this function in new code. + * + * Returns: 0 on success, -1 on failure + */ +int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id, + const VMStateDescription *vmsd, + void *opaque); + /** * vmstate_register_any() - legacy function to register state * serialisation description and let the function choose the id diff --git a/migration/savevm.c b/migration/savevm.c index c7835e9c73..c7596c3e9b 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -846,6 +846,24 @@ static void vmstate_check(const VMStateDescription *vmsd) } } +/* + * See comment in hw/intc/xics.c:icp_realize() + * + * This function can be removed when + * pre_2_10_vmstate_register_dummy_icp() is removed. + */ +int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id, + const VMStateDescription *vmsd, + void *opaque) +{ + SaveStateEntry *se = find_se(vmsd->name, instance_id); + + if (se) { + savevm_state_handler_remove(se); + } + return vmstate_register(obj, instance_id, vmsd, opaque); +} + int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id, const VMStateDescription *vmsd, void *opaque, int alias_id, From caa91b3c44cdb2d2921e25eda554d38c527e6c47 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 20 Oct 2023 11:07:27 +0200 Subject: [PATCH 10/40] migration: Check in savevm_state_handler_insert for dups Before finally register one SaveStateEntry, we detect for duplicated entries. This could be helpful to notify us asap instead of get silent migration failures which could be hard to diagnose. For example, this patch will generate a message like this (if without previous fixes on x2apic) as long as we wants to boot a VM instance with "-smp 200,maxcpus=288,sockets=2,cores=72,threads=2" and QEMU will bail out even before VM starts: savevm_state_handler_insert: Detected duplicate SaveStateEntry: id=apic, instance_id=0x0 Suggested-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-10-quintela@redhat.com> --- migration/savevm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/migration/savevm.c b/migration/savevm.c index c7596c3e9b..2095ddd6f8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -237,6 +237,8 @@ static SaveState savevm_state = { .global_section_id = 0, }; +static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id); + static bool should_validate_capability(int capability) { assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX); @@ -716,6 +718,18 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) assert(priority <= MIG_PRI_MAX); + /* + * This should never happen otherwise migration will probably fail + * silently somewhere because we can be wrongly applying one + * object properties upon another one. Bail out ASAP. + */ + if (find_se(nse->idstr, nse->instance_id)) { + error_report("%s: Detected duplicate SaveStateEntry: " + "id=%s, instance_id=0x%"PRIx32, __func__, + nse->idstr, nse->instance_id); + exit(EXIT_FAILURE); + } + for (i = priority - 1; i >= 0; i--) { se = savevm_state.handler_pri_head[i]; if (se != NULL) { From 5b146be3e78d0290670cfc60179218e539a7774f Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:28 +0200 Subject: [PATCH 11/40] migration: Improve example and documentation of vmstate_register() Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-11-quintela@redhat.com> --- docs/devel/migration.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index be913630c3..240eb16d90 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -167,13 +167,17 @@ An example (from hw/input/pckbd.c) } }; -We are declaring the state with name "pckbd". -The ``version_id`` is 3, and the fields are 4 uint8_t in a KBDState structure. -We registered this with: +We are declaring the state with name "pckbd". The ``version_id`` is +3, and there are 4 uint8_t fields in the KBDState structure. We +registered this ``VMSTATEDescription`` with one of the following +functions. The first one will generate a device ``instance_id`` +different for each registration. Use the second one if you already +have an id that is different for each instance of the device: .. code:: c - vmstate_register(NULL, 0, &vmstate_kbd, s); + vmstate_register_any(NULL, &vmstate_kbd, s); + vmstate_register(NULL, instance_id, &vmstate_kbd, s); For devices that are ``qdev`` based, we can register the device in the class init function: From a9500913ab96f59eb8f7e5fc79f80d5d66842102 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:29 +0200 Subject: [PATCH 12/40] migration: Use vmstate_register_any() for audio We can have more than one audio backend. void audio_init_audiodevs(void) { AudiodevListEntry *e; QSIMPLEQ_FOREACH(e, &audiodevs, next) { audio_init(e->dev, &error_fatal); } } Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-12-quintela@redhat.com> --- audio/audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audio/audio.c b/audio/audio.c index e9815d6812..f91e05b72c 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -1781,7 +1781,7 @@ static AudioState *audio_init(Audiodev *dev, Error **errp) QTAILQ_INSERT_TAIL(&audio_states, s, list); QLIST_INIT (&s->card_head); - vmstate_register (NULL, 0, &vmstate_audio, s); + vmstate_register_any(NULL, &vmstate_audio, s); return s; out: From 7769fb81efeda6546f4c0fe3324bde91443aabec Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:30 +0200 Subject: [PATCH 13/40] migration: Use vmstate_register_any() for eeprom93xx We can have more than one eeprom93xx. For instance: e100_nic_realize() -> eeprom93xx_new() Reviewed-by: Stefan Berger Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-13-quintela@redhat.com> --- hw/nvram/eeprom93xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvram/eeprom93xx.c b/hw/nvram/eeprom93xx.c index 1081e2cc0d..57d63638d7 100644 --- a/hw/nvram/eeprom93xx.c +++ b/hw/nvram/eeprom93xx.c @@ -321,7 +321,7 @@ eeprom_t *eeprom93xx_new(DeviceState *dev, uint16_t nwords) /* Output DO is tristate, read results in 1. */ eeprom->eedo = 1; logout("eeprom = 0x%p, nwords = %u\n", eeprom, nwords); - vmstate_register(VMSTATE_IF(dev), 0, &vmstate_eeprom, eeprom); + vmstate_register_any(VMSTATE_IF(dev), &vmstate_eeprom, eeprom); return eeprom; } From c64a59552e44fbae22a565eec3632df59e79d57c Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 20 Oct 2023 11:07:31 +0200 Subject: [PATCH 14/40] migration: Use vmstate_register_any() for vmware_vga I have no idea if we can have more than one vmware_vga device, so play it safe. Reviewed-by: Stefan Berger Reviewed-by: Thomas Huth Signed-off-by: Juan Quintela Message-ID: <20231020090731.28701-14-quintela@redhat.com> --- hw/display/vmware_vga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index 09591fbd39..7490d43881 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -1264,7 +1264,7 @@ static void vmsvga_init(DeviceState *dev, struct vmsvga_state_s *s, vga_common_init(&s->vga, OBJECT(dev), &error_fatal); vga_init(&s->vga, OBJECT(dev), address_space, io, true); - vmstate_register(NULL, 0, &vmstate_vga_common, &s->vga); + vmstate_register_any(NULL, &vmstate_vga_common, &s->vga); s->new_depth = 32; } From 62f5da7dd10a594fb30cebb5569dc738456f7131 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 Oct 2023 12:33:42 -0400 Subject: [PATCH 15/40] migration: Set downtime_start even for postcopy Postcopy calculates its downtime separately. It always sets MigrationState.downtime properly, but not MigrationState.downtime_start. Make postcopy do the same as other modes on properly recording the timestamp when the VM is going to be stopped. Drop the temporary variable in postcopy_start() along the way. Signed-off-by: Peter Xu Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231030163346.765724-2-peterx@redhat.com> --- migration/migration.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 6abcbefd9c..6dcdc5be2b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2126,7 +2126,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) int ret; QIOChannelBuffer *bioc; QEMUFile *fb; - int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); uint64_t bandwidth = migrate_max_postcopy_bandwidth(); bool restart_block = false; int cur_state = MIGRATION_STATUS_ACTIVE; @@ -2148,6 +2147,8 @@ static int postcopy_start(MigrationState *ms, Error **errp) qemu_mutex_lock_iothread(); trace_postcopy_start_set_run(); + ms->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); global_state_store(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); @@ -2250,7 +2251,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) ms->postcopy_after_devices = true; migration_call_notifiers(ms); - ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; + ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - ms->downtime_start; qemu_mutex_unlock_iothread(); From e22ffad03a32d3fc4717561f8af5ef7d9657728d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 Oct 2023 12:33:43 -0400 Subject: [PATCH 16/40] migration: Add migration_downtime_start|end() helpers Unify the three users on recording downtimes with the same pair of helpers. Signed-off-by: Peter Xu Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231030163346.765724-3-peterx@redhat.com> --- migration/migration.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 6dcdc5be2b..8aac0c753e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -101,6 +101,24 @@ static int migration_maybe_pause(MigrationState *s, static void migrate_fd_cancel(MigrationState *s); static int close_return_path_on_source(MigrationState *s); +static void migration_downtime_start(MigrationState *s) +{ + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +} + +static void migration_downtime_end(MigrationState *s) +{ + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + /* + * If downtime already set, should mean that postcopy already set it, + * then that should be the real downtime already. + */ + if (!s->downtime) { + s->downtime = now - s->downtime_start; + } +} + static bool migration_needs_multiple_sockets(void) { return migrate_multifd() || migrate_postcopy_preempt(); @@ -2147,7 +2165,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) qemu_mutex_lock_iothread(); trace_postcopy_start_set_run(); - ms->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + migration_downtime_start(ms); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); global_state_store(); @@ -2251,7 +2269,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) ms->postcopy_after_devices = true; migration_call_notifiers(ms); - ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - ms->downtime_start; + migration_downtime_end(ms); qemu_mutex_unlock_iothread(); @@ -2347,7 +2365,7 @@ static int migration_completion_precopy(MigrationState *s, int ret; qemu_mutex_lock_iothread(); - s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + migration_downtime_start(s); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); s->vm_old_state = runstate_get(); @@ -2704,15 +2722,8 @@ static void migration_calculate_complete(MigrationState *s) int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t transfer_time; + migration_downtime_end(s); s->total_time = end_time - s->start_time; - if (!s->downtime) { - /* - * It's still not set, so we are precopy migration. For - * postcopy, downtime is calculated during postcopy_start(). - */ - s->downtime = end_time - s->downtime_start; - } - transfer_time = s->total_time - s->setup_time; if (transfer_time) { s->mbps = ((double) bytes * 8.0) / transfer_time / 1000; @@ -3131,7 +3142,7 @@ static void bg_migration_vm_start_bh(void *opaque) s->vm_start_bh = NULL; vm_start(); - s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start; + migration_downtime_end(s); } /** @@ -3198,7 +3209,7 @@ static void *bg_migration_thread(void *opaque) s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; trace_migration_thread_setup_complete(); - s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + migration_downtime_start(s); qemu_mutex_lock_iothread(); From 3c80f14272057f77c87fb9971e8c4f603a03289a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 Oct 2023 12:33:44 -0400 Subject: [PATCH 17/40] migration: Add per vmstate downtime tracepoints We have a bunch of savevm_section* tracepoints, they're good to analyze migration stream, but not always suitable if someone would like to analyze the migration downtime. Two major problems: - savevm_section* tracepoints are dumping all sections, we only care about the sections that contribute to the downtime - They don't have an identifier to show the type of sections, so no way to filter downtime information either easily. We can add type into the tracepoints, but instead of doing so, this patch kept them untouched, instead of adding a bunch of downtime specific tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a full picture of how the downtime is distributed across iterative and non-iterative vmstate save/load. Note that here both save() and load() need to be traced, because both of them may contribute to the downtime. The contribution is not a simple "add them together", though: consider when the src is doing a save() of device1 while the dest can be load()ing for device2, so they can happen concurrently. Tracking both sides make sense because device load() and save() can be imbalanced, one device can save() super fast, but load() super slow, vice versa. We can't figure that out without tracing both. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231030163346.765724-4-peterx@redhat.com> --- migration/savevm.c | 49 ++++++++++++++++++++++++++++++++++++++---- migration/trace-events | 2 ++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index 2095ddd6f8..99ce42b6f3 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1491,6 +1491,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f) static int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) { + int64_t start_ts_each, end_ts_each; SaveStateEntry *se; int ret; @@ -1507,6 +1508,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) continue; } } + + start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME); trace_savevm_section_start(se->idstr, se->section_id); save_section_header(f, se, QEMU_VM_SECTION_END); @@ -1518,6 +1521,9 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) qemu_file_set_error(f, ret); return -1; } + end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id, + end_ts_each - start_ts_each); } return 0; @@ -1528,6 +1534,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, bool inactivate_disks) { MigrationState *ms = migrate_get_current(); + int64_t start_ts_each, end_ts_each; JSONWriter *vmdesc = ms->vmdesc; int vmdesc_len; SaveStateEntry *se; @@ -1539,11 +1546,17 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, continue; } + start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + ret = vmstate_save(f, se, vmdesc); if (ret) { qemu_file_set_error(f, ret); return ret; } + + end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id, + end_ts_each - start_ts_each); } if (inactivate_disks) { @@ -2537,9 +2550,12 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se) } static int -qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) +qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis, + uint8_t type) { + bool trace_downtime = (type == QEMU_VM_SECTION_FULL); uint32_t instance_id, version_id, section_id; + int64_t start_ts, end_ts; SaveStateEntry *se; char idstr[256]; int ret; @@ -2588,12 +2604,23 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) return -EINVAL; } + if (trace_downtime) { + start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + } + ret = vmstate_load(f, se); if (ret < 0) { error_report("error while loading state for instance 0x%"PRIx32" of" " device '%s'", instance_id, idstr); return ret; } + + if (trace_downtime) { + end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + trace_vmstate_downtime_load("non-iterable", se->idstr, + se->instance_id, end_ts - start_ts); + } + if (!check_section_footer(f, se)) { return -EINVAL; } @@ -2602,8 +2629,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) } static int -qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis) +qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis, + uint8_t type) { + bool trace_downtime = (type == QEMU_VM_SECTION_END); + int64_t start_ts, end_ts; uint32_t section_id; SaveStateEntry *se; int ret; @@ -2628,12 +2658,23 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis) return -EINVAL; } + if (trace_downtime) { + start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + } + ret = vmstate_load(f, se); if (ret < 0) { error_report("error while loading state section id %d(%s)", section_id, se->idstr); return ret; } + + if (trace_downtime) { + end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME); + trace_vmstate_downtime_load("iterable", se->idstr, + se->instance_id, end_ts - start_ts); + } + if (!check_section_footer(f, se)) { return -EINVAL; } @@ -2822,14 +2863,14 @@ retry: switch (section_type) { case QEMU_VM_SECTION_START: case QEMU_VM_SECTION_FULL: - ret = qemu_loadvm_section_start_full(f, mis); + ret = qemu_loadvm_section_start_full(f, mis, section_type); if (ret < 0) { goto out; } break; case QEMU_VM_SECTION_PART: case QEMU_VM_SECTION_END: - ret = qemu_loadvm_section_part_end(f, mis); + ret = qemu_loadvm_section_part_end(f, mis, section_type); if (ret < 0) { goto out; } diff --git a/migration/trace-events b/migration/trace-events index fa9486dffe..5820add1f3 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -48,6 +48,8 @@ savevm_state_cleanup(void) "" savevm_state_complete_precopy(void) "" vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s" +vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64 +vmstate_downtime_load(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64 postcopy_pause_incoming(void) "" postcopy_pause_incoming_continued(void) "" postcopy_page_req_sync(void *host_addr) "sync page req %p" From 93bdf888fa5c1b3972e337c8c8fbe7d2bcd29900 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 Oct 2023 12:33:45 -0400 Subject: [PATCH 18/40] migration: migration_stop_vm() helper Provide a helper for non-COLO use case of migration to stop a VM. This prepares for adding some downtime relevant tracepoints to migration, where they may or may not apply to COLO. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231030163346.765724-5-peterx@redhat.com> --- migration/migration.c | 11 ++++++++--- migration/migration.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8aac0c753e..3e38294485 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -149,6 +149,11 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) return (a > b) - (a < b); } +int migration_stop_vm(RunState state) +{ + return vm_stop_force_state(state); +} + void migration_object_init(void) { /* This can only be called once. */ @@ -2169,7 +2174,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); global_state_store(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE); if (ret < 0) { goto fail; } @@ -2371,7 +2376,7 @@ static int migration_completion_precopy(MigrationState *s, s->vm_old_state = runstate_get(); global_state_store(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE); trace_migration_completion_vm_stop(ret); if (ret < 0) { goto out_unlock; @@ -3222,7 +3227,7 @@ static void *bg_migration_thread(void *opaque) global_state_store(); /* Forcibly stop VM before saving state of vCPUs and devices */ - if (vm_stop_force_state(RUN_STATE_PAUSED)) { + if (migration_stop_vm(RUN_STATE_PAUSED)) { goto fail; } /* diff --git a/migration/migration.h b/migration/migration.h index ae82004892..5944107ad5 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -544,4 +544,6 @@ void migration_rp_wait(MigrationState *s); */ void migration_rp_kick(MigrationState *s); +int migration_stop_vm(RunState state); + #endif From 3e5f3bcdc281fed483b5bbe72050b63d4b41abf5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 30 Oct 2023 12:33:46 -0400 Subject: [PATCH 19/40] migration: Add tracepoints for downtime checkpoints This patch is inspired by Joao Martin's patch here: https://lore.kernel.org/r/20230926161841.98464-1-joao.m.martins@oracle.com Add tracepoints for major downtime checkpoints on both src and dst. They share the same tracepoint with a string showing its stage. Besides the checkpoints in the previous patch, this patch also added destination checkpoints. On src, we have these checkpoints added: - src-downtime-start: right before vm stops on src - src-vm-stopped: after vm is fully stopped - src-iterable-saved: after all iterables saved (END sections) - src-non-iterable-saved: after all non-iterable saved (FULL sections) - src-downtime-stop: migration fully completed On dst, we have these checkpoints added: - dst-precopy-loadvm-completes: after loadvm all done for precopy - dst-precopy-bh-*: record BH steps to resume VM for precopy - dst-postcopy-bh-*: record BH steps to resume VM for postcopy On dst side, we don't have a good way to trace total time consumed by iterable or non-iterable for now. We can mark it by 1st time receiving a FULL / END section, but rather than that let's just rely on the other tracepoints added for vmstates to back up the information. With this patch, one can enable "vmstate_downtime*" tracepoints and it'll enable all tracepoints for downtime measurements necessary. Drop loadvm_postcopy_handle_run_bh() tracepoint alongside, because they service the same purpose, which was only for postcopy. We then have unified prefix for all downtime relevant tracepoints. Co-developed-by: Joao Martins Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231030163346.765724-6-peterx@redhat.com> --- migration/migration.c | 16 +++++++++++++++- migration/savevm.c | 14 +++++++++----- migration/trace-events | 2 +- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 3e38294485..c334b9effd 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -103,6 +103,7 @@ static int close_return_path_on_source(MigrationState *s); static void migration_downtime_start(MigrationState *s) { + trace_vmstate_downtime_checkpoint("src-downtime-start"); s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } @@ -117,6 +118,8 @@ static void migration_downtime_end(MigrationState *s) if (!s->downtime) { s->downtime = now - s->downtime_start; } + + trace_vmstate_downtime_checkpoint("src-downtime-end"); } static bool migration_needs_multiple_sockets(void) @@ -151,7 +154,11 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) int migration_stop_vm(RunState state) { - return vm_stop_force_state(state); + int ret = vm_stop_force_state(state); + + trace_vmstate_downtime_checkpoint("src-vm-stopped"); + + return ret; } void migration_object_init(void) @@ -495,6 +502,8 @@ static void process_incoming_migration_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; + trace_vmstate_downtime_checkpoint("dst-precopy-bh-enter"); + /* If capability late_block_activate is set: * Only fire up the block code now if we're going to restart the * VM, else 'cont' will do it. @@ -520,6 +529,8 @@ static void process_incoming_migration_bh(void *opaque) */ qemu_announce_self(&mis->announce_timer, migrate_announce_params()); + trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced"); + multifd_load_shutdown(); dirty_bitmap_mig_before_vm_start(); @@ -537,6 +548,7 @@ static void process_incoming_migration_bh(void *opaque) } else { runstate_set(global_state_get_runstate()); } + trace_vmstate_downtime_checkpoint("dst-precopy-bh-vm-started"); /* * This must happen after any state changes since as soon as an external * observer sees this event they might start to prod at the VM assuming @@ -571,6 +583,8 @@ process_incoming_migration_co(void *opaque) ret = qemu_loadvm_state(mis->from_src_file); mis->loadvm_co = NULL; + trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed"); + ps = postcopy_state_get(); trace_process_incoming_migration_co_end(ret, ps); if (ps != POSTCOPY_INCOMING_NONE) { diff --git a/migration/savevm.c b/migration/savevm.c index 99ce42b6f3..bc98c2ea6f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1526,6 +1526,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) end_ts_each - start_ts_each); } + trace_vmstate_downtime_checkpoint("src-iterable-saved"); + return 0; } @@ -1592,6 +1594,8 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, json_writer_free(vmdesc); ms->vmdesc = NULL; + trace_vmstate_downtime_checkpoint("src-non-iterable-saved"); + return 0; } @@ -2133,18 +2137,18 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) Error *local_err = NULL; MigrationIncomingState *mis = opaque; - trace_loadvm_postcopy_handle_run_bh("enter"); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter"); /* TODO we should move all of this lot into postcopy_ram.c or a shared code * in migration.c */ cpu_synchronize_all_post_init(); - trace_loadvm_postcopy_handle_run_bh("after cpu sync"); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced"); qemu_announce_self(&mis->announce_timer, migrate_announce_params()); - trace_loadvm_postcopy_handle_run_bh("after announce"); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced"); /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ @@ -2155,7 +2159,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) autostart = false; } - trace_loadvm_postcopy_handle_run_bh("after invalidate cache"); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated"); dirty_bitmap_mig_before_vm_start(); @@ -2169,7 +2173,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) qemu_bh_delete(mis->bh); - trace_loadvm_postcopy_handle_run_bh("return"); + trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started"); } /* After all discards we can start running and asking for pages */ diff --git a/migration/trace-events b/migration/trace-events index 5820add1f3..e54f317e3b 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -17,7 +17,6 @@ loadvm_handle_recv_bitmap(char *s) "%s" loadvm_postcopy_handle_advise(void) "" loadvm_postcopy_handle_listen(const char *str) "%s" loadvm_postcopy_handle_run(void) "" -loadvm_postcopy_handle_run_bh(const char *str) "%s" loadvm_postcopy_handle_resume(void) "" loadvm_postcopy_ram_handle_discard(void) "" loadvm_postcopy_ram_handle_discard_end(void) "" @@ -50,6 +49,7 @@ vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64 vmstate_downtime_load(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64 +vmstate_downtime_checkpoint(const char *checkpoint) "%s" postcopy_pause_incoming(void) "" postcopy_pause_incoming_continued(void) "" postcopy_page_req_sync(void *host_addr) "sync page req %p" From eea1e5c9d6fac211cf3fe2d3d7f3be2bf767d421 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:24 -0700 Subject: [PATCH 20/40] migration: mode parameter Create a mode migration parameter that can be used to select alternate migration algorithms. The default mode is normal, representing the current migration algorithm, and does not need to be explicitly set. No functional change until a new mode is added, except that the mode is shown by the 'info migrate' command. Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-2-git-send-email-steven.sistare@oracle.com> --- hw/core/qdev-properties-system.c | 14 ++++++++++++++ include/hw/qdev-properties-system.h | 4 ++++ include/migration/misc.h | 1 + migration/migration-hmp-cmds.c | 9 +++++++++ migration/options.c | 21 +++++++++++++++++++++ migration/options.h | 1 + qapi/migration.json | 27 ++++++++++++++++++++++++--- 7 files changed, 74 insertions(+), 3 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 7c6dfab128..b9179e8917 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -673,6 +673,20 @@ const PropertyInfo qdev_prop_multifd_compression = { .set_default_value = qdev_propinfo_set_default_value_enum, }; +/* --- MigMode --- */ + +QEMU_BUILD_BUG_ON(sizeof(MigMode) != sizeof(int)); + +const PropertyInfo qdev_prop_mig_mode = { + .name = "MigMode", + .description = "mig_mode values, " + "normal", + .enum_table = &MigMode_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; + /* --- Reserved Region --- */ /* diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h index e4f8a13afc..91f7a2452d 100644 --- a/include/hw/qdev-properties-system.h +++ b/include/hw/qdev-properties-system.h @@ -7,6 +7,7 @@ extern const PropertyInfo qdev_prop_chr; extern const PropertyInfo qdev_prop_macaddr; extern const PropertyInfo qdev_prop_reserved_region; extern const PropertyInfo qdev_prop_multifd_compression; +extern const PropertyInfo qdev_prop_mig_mode; extern const PropertyInfo qdev_prop_losttickpolicy; extern const PropertyInfo qdev_prop_blockdev_on_error; extern const PropertyInfo qdev_prop_bios_chs_trans; @@ -42,6 +43,9 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; #define DEFINE_PROP_MULTIFD_COMPRESSION(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_multifd_compression, \ MultiFDCompression) +#define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \ + MigMode) #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ LostTickPolicy) diff --git a/include/migration/misc.h b/include/migration/misc.h index 673ac490fb..1bc8902e6d 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -15,6 +15,7 @@ #define MIGRATION_MISC_H #include "qemu/notify.h" +#include "qapi/qapi-types-migration.h" #include "qapi/qapi-types-net.h" /* migration/ram.c */ diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index dfe98da355..a170440991 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -387,6 +387,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %" PRIu64 " MB/s\n", MigrationParameter_str(MIGRATION_PARAMETER_VCPU_DIRTY_LIMIT), params->vcpu_dirty_limit); + + assert(params->has_mode); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_MODE), + qapi_enum_lookup(&MigMode_lookup, params->mode)); } qapi_free_MigrationParameters(params); @@ -661,6 +666,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_vcpu_dirty_limit = true; visit_type_size(v, param, &p->vcpu_dirty_limit, &err); break; + case MIGRATION_PARAMETER_MODE: + p->has_mode = true; + visit_type_MigMode(v, param, &p->mode, &err); + break; default: assert(0); } diff --git a/migration/options.c b/migration/options.c index 9a39826ca5..8d8ec73ad9 100644 --- a/migration/options.c +++ b/migration/options.c @@ -176,6 +176,9 @@ Property migration_properties[] = { DEFINE_PROP_UINT64("vcpu-dirty-limit", MigrationState, parameters.vcpu_dirty_limit, DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT), + DEFINE_PROP_MIG_MODE("mode", MigrationState, + parameters.mode, + MIG_MODE_NORMAL), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -827,6 +830,13 @@ uint64_t migrate_max_postcopy_bandwidth(void) return s->parameters.max_postcopy_bandwidth; } +MigMode migrate_mode(void) +{ + MigrationState *s = migrate_get_current(); + + return s->parameters.mode; +} + int migrate_multifd_channels(void) { MigrationState *s = migrate_get_current(); @@ -999,6 +1009,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->x_vcpu_dirty_limit_period = s->parameters.x_vcpu_dirty_limit_period; params->has_vcpu_dirty_limit = true; params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; + params->has_mode = true; + params->mode = s->parameters.mode; return params; } @@ -1034,6 +1046,7 @@ void migrate_params_init(MigrationParameters *params) params->has_announce_step = true; params->has_x_vcpu_dirty_limit_period = true; params->has_vcpu_dirty_limit = true; + params->has_mode = true; } /* @@ -1331,6 +1344,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_vcpu_dirty_limit) { dest->vcpu_dirty_limit = params->vcpu_dirty_limit; } + + if (params->has_mode) { + dest->mode = params->mode; + } } static void migrate_params_apply(MigrateSetParameters *params, Error **errp) @@ -1471,6 +1488,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_vcpu_dirty_limit) { s->parameters.vcpu_dirty_limit = params->vcpu_dirty_limit; } + + if (params->has_mode) { + s->parameters.mode = params->mode; + } } void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) diff --git a/migration/options.h b/migration/options.h index 237f2d6b4a..246c160aee 100644 --- a/migration/options.h +++ b/migration/options.h @@ -83,6 +83,7 @@ uint8_t migrate_max_cpu_throttle(void); uint64_t migrate_max_bandwidth(void); uint64_t migrate_avail_switchover_bandwidth(void); uint64_t migrate_max_postcopy_bandwidth(void); +MigMode migrate_mode(void); int migrate_multifd_channels(void); MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); diff --git a/qapi/migration.json b/qapi/migration.json index e6610af428..47c02a9346 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -631,6 +631,15 @@ 'data': [ 'none', 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } +## +# @MigMode: +# +# @normal: the original form of migration. (since 8.2) +# +## +{ 'enum': 'MigMode', + 'data': [ 'normal' ] } + ## # @BitmapMigrationBitmapAliasTransform: # @@ -849,6 +858,9 @@ # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration. # Defaults to 1. (Since 8.1) # +# @mode: Migration mode. See description in @MigMode. Default is 'normal'. +# (Since 8.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -881,7 +893,8 @@ 'multifd-zlib-level', 'multifd-zstd-level', 'block-bitmap-mapping', { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, - 'vcpu-dirty-limit'] } + 'vcpu-dirty-limit', + 'mode'] } ## # @MigrateSetParameters: @@ -1033,6 +1046,9 @@ # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration. # Defaults to 1. (Since 8.1) # +# @mode: Migration mode. See description in @MigMode. Default is 'normal'. +# (Since 8.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1085,7 +1101,8 @@ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, - '*vcpu-dirty-limit': 'uint64'} } + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode'} } ## # @migrate-set-parameters: @@ -1257,6 +1274,9 @@ # @vcpu-dirty-limit: Dirtyrate limit (MB/s) during live migration. # Defaults to 1. (Since 8.1) # +# @mode: Migration mode. See description in @MigMode. Default is 'normal'. +# (Since 8.2) +# # Features: # # @deprecated: Member @block-incremental is deprecated. Use @@ -1306,7 +1326,8 @@ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], '*x-vcpu-dirty-limit-period': { 'type': 'uint64', 'features': [ 'unstable' ] }, - '*vcpu-dirty-limit': 'uint64'} } + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode'} } ## # @query-migrate-parameters: From fa3673e497a1119fbf2c1f948da71907a84385d9 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:25 -0700 Subject: [PATCH 21/40] migration: per-mode blockers Extend the blocker interface so that a blocker can be registered for one or more migration modes. The existing interfaces register a blocker for all modes, and the new interfaces take a varargs list of modes. Internally, maintain a separate blocker list per mode. The same Error object may be added to multiple lists. When a block is deleted, it is removed from every list, and the Error is freed. No functional change until a new mode is added. Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-3-git-send-email-steven.sistare@oracle.com> --- include/migration/blocker.h | 44 ++++++++++++++-- migration/migration.c | 101 ++++++++++++++++++++++++++++++------ stubs/migr-blocker.c | 10 ++++ 3 files changed, 135 insertions(+), 20 deletions(-) diff --git a/include/migration/blocker.h b/include/migration/blocker.h index b048f301b4..a687ac0efe 100644 --- a/include/migration/blocker.h +++ b/include/migration/blocker.h @@ -14,8 +14,12 @@ #ifndef MIGRATION_BLOCKER_H #define MIGRATION_BLOCKER_H +#include "qapi/qapi-types-migration.h" + +#define MIG_MODE_ALL MIG_MODE__MAX + /** - * @migrate_add_blocker - prevent migration from proceeding + * @migrate_add_blocker - prevent all modes of migration from proceeding * * @reasonp - address of an error to be returned whenever migration is attempted * @@ -30,8 +34,8 @@ int migrate_add_blocker(Error **reasonp, Error **errp); /** - * @migrate_add_blocker_internal - prevent migration from proceeding without - * only-migrate implications + * @migrate_add_blocker_internal - prevent all modes of migration from + * proceeding, but ignore -only-migratable * * @reasonp - address of an error to be returned whenever migration is attempted * @@ -50,7 +54,7 @@ int migrate_add_blocker(Error **reasonp, Error **errp); int migrate_add_blocker_internal(Error **reasonp, Error **errp); /** - * @migrate_del_blocker - remove a blocking error from migration and free it. + * @migrate_del_blocker - remove a migration blocker from all modes and free it. * * @reasonp - address of the error blocking migration * @@ -58,4 +62,36 @@ int migrate_add_blocker_internal(Error **reasonp, Error **errp); */ void migrate_del_blocker(Error **reasonp); +/** + * @migrate_add_blocker_normal - prevent normal migration mode from proceeding + * + * @reasonp - address of an error to be returned whenever migration is attempted + * + * @errp - [out] The reason (if any) we cannot block migration right now. + * + * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set. + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free @reasonp, except by + * calling migrate_del_blocker. + */ +int migrate_add_blocker_normal(Error **reasonp, Error **errp); + +/** + * @migrate_add_blocker_modes - prevent some modes of migration from proceeding + * + * @reasonp - address of an error to be returned whenever migration is attempted + * + * @errp - [out] The reason (if any) we cannot block migration right now. + * + * @mode - one or more migration modes to be blocked. The list is terminated + * by -1 or MIG_MODE_ALL. For the latter, all modes are blocked. + * + * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set. + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free *@reasonp before the blocker is removed. + */ +int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...); + #endif diff --git a/migration/migration.c b/migration/migration.c index c334b9effd..11c1490090 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -92,7 +92,7 @@ enum mig_rp_message_type { static MigrationState *current_migration; static MigrationIncomingState *current_incoming; -static GSList *migration_blockers; +static GSList *migration_blockers[MIG_MODE__MAX]; static bool migration_object_check(MigrationState *ms, Error **errp); static int migration_maybe_pause(MigrationState *s, @@ -1043,7 +1043,7 @@ static void fill_source_migration_info(MigrationInfo *info) { MigrationState *s = migrate_get_current(); int state = qatomic_read(&s->state); - GSList *cur_blocker = migration_blockers; + GSList *cur_blocker = migration_blockers[migrate_mode()]; info->blocked_reasons = NULL; @@ -1507,38 +1507,105 @@ int migrate_init(MigrationState *s, Error **errp) return 0; } -int migrate_add_blocker_internal(Error **reasonp, Error **errp) +static bool is_busy(Error **reasonp, Error **errp) { + ERRP_GUARD(); + /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(migration/snapshot in progress) for: "); *reasonp = NULL; - return -EBUSY; + return true; } + return false; +} - migration_blockers = g_slist_prepend(migration_blockers, *reasonp); +static bool is_only_migratable(Error **reasonp, Error **errp, int modes) +{ + ERRP_GUARD(); + + if (only_migratable && (modes & BIT(MIG_MODE_NORMAL))) { + error_propagate_prepend(errp, *reasonp, + "disallowing migration blocker " + "(--only-migratable) for: "); + *reasonp = NULL; + return true; + } + return false; +} + +static int get_modes(MigMode mode, va_list ap) +{ + int modes = 0; + + while (mode != -1 && mode != MIG_MODE_ALL) { + assert(mode >= MIG_MODE_NORMAL && mode < MIG_MODE__MAX); + modes |= BIT(mode); + mode = va_arg(ap, MigMode); + } + if (mode == MIG_MODE_ALL) { + modes = BIT(MIG_MODE__MAX) - 1; + } + return modes; +} + +static int add_blockers(Error **reasonp, Error **errp, int modes) +{ + for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { + if (modes & BIT(mode)) { + migration_blockers[mode] = g_slist_prepend(migration_blockers[mode], + *reasonp); + } + } return 0; } int migrate_add_blocker(Error **reasonp, Error **errp) { - if (only_migratable) { - error_propagate_prepend(errp, *reasonp, - "disallowing migration blocker " - "(--only-migratable) for: "); - *reasonp = NULL; - return -EACCES; - } + return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_ALL); +} - return migrate_add_blocker_internal(reasonp, errp); +int migrate_add_blocker_normal(Error **reasonp, Error **errp) +{ + return migrate_add_blocker_modes(reasonp, errp, MIG_MODE_NORMAL, -1); +} + +int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...) +{ + int modes; + va_list ap; + + va_start(ap, mode); + modes = get_modes(mode, ap); + va_end(ap); + + if (is_only_migratable(reasonp, errp, modes)) { + return -EACCES; + } else if (is_busy(reasonp, errp)) { + return -EBUSY; + } + return add_blockers(reasonp, errp, modes); +} + +int migrate_add_blocker_internal(Error **reasonp, Error **errp) +{ + int modes = BIT(MIG_MODE__MAX) - 1; + + if (is_busy(reasonp, errp)) { + return -EBUSY; + } + return add_blockers(reasonp, errp, modes); } void migrate_del_blocker(Error **reasonp) { if (*reasonp) { - migration_blockers = g_slist_remove(migration_blockers, *reasonp); + for (MigMode mode = 0; mode < MIG_MODE__MAX; mode++) { + migration_blockers[mode] = g_slist_remove(migration_blockers[mode], + *reasonp); + } error_free(*reasonp); *reasonp = NULL; } @@ -1634,12 +1701,14 @@ void qmp_migrate_pause(Error **errp) bool migration_is_blocked(Error **errp) { + GSList *blockers = migration_blockers[migrate_mode()]; + if (qemu_savevm_state_blocked(errp)) { return true; } - if (migration_blockers) { - error_propagate(errp, error_copy(migration_blockers->data)); + if (blockers) { + error_propagate(errp, error_copy(blockers->data)); return true; } diff --git a/stubs/migr-blocker.c b/stubs/migr-blocker.c index 17a5dbf87b..11cbff268f 100644 --- a/stubs/migr-blocker.c +++ b/stubs/migr-blocker.c @@ -6,6 +6,16 @@ int migrate_add_blocker(Error **reasonp, Error **errp) return 0; } +int migrate_add_blocker_normal(Error **reasonp, Error **errp) +{ + return 0; +} + +int migrate_add_blocker_modes(Error **reasonp, Error **errp, MigMode mode, ...) +{ + return 0; +} + void migrate_del_blocker(Error **reasonp) { } From e0ee3a8ff74a47ae8ca147d8192e2c0745dfb51d Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:26 -0700 Subject: [PATCH 22/40] cpr: relax blockdev migration blockers Some blockdevs block migration because they do not support sharing across hosts and/or do not support dirty bitmaps. These prohibitions do not apply if the old and new qemu processes do not run concurrently, and if new qemu starts on the same host as old, which is the case for cpr. Narrow the scope of these blockers so they only apply to normal mode. They will not block cpr modes when they are added in subsequent patches. No functional change until a new mode is added. Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-4-git-send-email-steven.sistare@oracle.com> --- block/parallels.c | 2 +- block/qcow.c | 2 +- block/vdi.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 2 +- block/vpc.c | 2 +- block/vvfat.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 1d695ce7fb..6318dd02e7 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1369,7 +1369,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } diff --git a/block/qcow.c b/block/qcow.c index fdd4c83948..eab68e387c 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -307,7 +307,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } diff --git a/block/vdi.c b/block/vdi.c index fd7e365383..c647d72895 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -498,7 +498,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail_free_bmap; } diff --git a/block/vhdx.c b/block/vhdx.c index e37f8c0926..a9d08742f9 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1096,7 +1096,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vhdx format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } diff --git a/block/vmdk.c b/block/vmdk.c index 1335d39e16..85864b8045 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1386,7 +1386,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vmdk format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } diff --git a/block/vpc.c b/block/vpc.c index c30cf8689a..aa1a48ae0e 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -452,7 +452,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } diff --git a/block/vvfat.c b/block/vvfat.c index 266e036dcd..9d050ba3ae 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1268,7 +1268,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, "The vvfat (rw) format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(&s->migration_blocker, errp); + ret = migrate_add_blocker_normal(&s->migration_blocker, errp); if (ret < 0) { goto fail; } From 89415796f688036faa88e6dc9ae2f3acfcdc39c8 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:27 -0700 Subject: [PATCH 23/40] cpr: relax vhost migration blockers vhost blocks migration if logging is not supported to track dirty memory, and vhost-user blocks it if the log cannot be saved to a shm fd. vhost-vdpa blocks migration if both hosts do not support all the device's features using a shadow VQ, for tracking requests and dirty memory. vhost-scsi blocks migration if storage cannot be shared across hosts, or if state cannot be migrated. None of these conditions apply if the old and new qemu processes do not run concurrently, and if new qemu starts on the same host as old, which is the case for cpr. Narrow the scope of these blockers so they only apply to normal mode. They will not block cpr modes when they are added in subsequent patches. No functional change until a new mode is added. Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-5-git-send-email-steven.sistare@oracle.com> --- hw/scsi/vhost-scsi.c | 2 +- hw/virtio/vhost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 5d9e06a9bb..3126df9e1d 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -210,7 +210,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) "When external environment supports it (Orchestrator migrates " "target SCSI device state or use shared storage over network), " "set 'migratable' property to true to enable migration."); - if (migrate_add_blocker(&vsc->migration_blocker, errp) < 0) { + if (migrate_add_blocker_normal(&vsc->migration_blocker, errp) < 0) { goto free_virtio; } } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index aa7b272452..9c9ae7109e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1527,7 +1527,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } if (hdev->migration_blocker != NULL) { - r = migrate_add_blocker(&hdev->migration_blocker, errp); + r = migrate_add_blocker_normal(&hdev->migration_blocker, errp); if (r < 0) { goto fail_busyloop; } From a87e64519b027c9b36d9badbd73b13f6475dfb3d Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:28 -0700 Subject: [PATCH 24/40] cpr: reboot mode Add the cpr-reboot migration mode. Usage: $ qemu-system-$arch -monitor stdio ... QEMU 8.1.50 monitor - type 'help' for more information (qemu) migrate_set_capability x-ignore-shared on (qemu) migrate_set_parameter mode cpr-reboot (qemu) migrate -d file:vm.state (qemu) info status VM status: paused (postmigrate) (qemu) quit $ qemu-system-$arch -monitor stdio -incoming defer ... QEMU 8.1.50 monitor - type 'help' for more information (qemu) migrate_set_capability x-ignore-shared on (qemu) migrate_set_parameter mode cpr-reboot (qemu) migrate_incoming file:vm.state (qemu) info status VM status: running In this mode, the migrate command saves state to a file, allowing one to quit qemu, reboot to an updated kernel, and restart an updated version of qemu. The caller must specify a migration URI that writes to and reads from a file. Unlike normal mode, the use of certain local storage options does not block the migration, but the caller must not modify guest block devices between the quit and restart. To avoid saving guest RAM to the file, the memory backend must be shared, and the @x-ignore-shared migration capability must be set. Guest RAM must be non-volatile across reboot, such as by backing it with a dax device, but this is not enforced. The restarted qemu arguments must match those used to initially start qemu, plus the -incoming option. Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-6-git-send-email-steven.sistare@oracle.com> --- hw/core/qdev-properties-system.c | 2 +- qapi/migration.json | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index b9179e8917..2f1dbb3fd7 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -680,7 +680,7 @@ QEMU_BUILD_BUG_ON(sizeof(MigMode) != sizeof(int)); const PropertyInfo qdev_prop_mig_mode = { .name = "MigMode", .description = "mig_mode values, " - "normal", + "normal,cpr-reboot", .enum_table = &MigMode_lookup, .get = qdev_propinfo_get_enum, .set = qdev_propinfo_set_enum, diff --git a/qapi/migration.json b/qapi/migration.json index 47c02a9346..3daeffc95d 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -636,9 +636,22 @@ # # @normal: the original form of migration. (since 8.2) # +# @cpr-reboot: The migrate command saves state to a file, allowing one to +# quit qemu, reboot to an updated kernel, and restart an updated +# version of qemu. The caller must specify a migration URI +# that writes to and reads from a file. Unlike normal mode, +# the use of certain local storage options does not block the +# migration, but the caller must not modify guest block devices +# between the quit and restart. To avoid saving guest RAM to the +# file, the memory backend must be shared, and the @x-ignore-shared +# migration capability must be set. Guest RAM must be non-volatile +# across reboot, such as by backing it with a dax device, but this +# is not enforced. The restarted qemu arguments must match those +# used to initially start qemu, plus the -incoming option. +# (since 8.2) ## { 'enum': 'MigMode', - 'data': [ 'normal' ] } + 'data': [ 'normal', 'cpr-reboot' ] } ## # @BitmapMigrationBitmapAliasTransform: From e7b428d6bc06a0dfffa7e47fb3b3ee47f1c93499 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 25 Oct 2023 12:44:29 -0700 Subject: [PATCH 25/40] tests/qtest: migration: add reboot mode test [ Maintainer note: I put the test as flaky because our CI has problems with shared memory. We will remove the flaky bits as soon as we get a solution. ] Signed-off-by: Steve Sistare Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <1698263069-406971-7-git-send-email-steven.sistare@oracle.com> --- tests/qtest/migration-test.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index bc70a14642..b7ebc23903 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -2026,6 +2026,31 @@ static void test_precopy_file_offset_bad(void) test_file_common(&args, false); } +static void *test_mode_reboot_start(QTestState *from, QTestState *to) +{ + migrate_set_parameter_str(from, "mode", "cpr-reboot"); + migrate_set_parameter_str(to, "mode", "cpr-reboot"); + + migrate_set_capability(from, "x-ignore-shared", true); + migrate_set_capability(to, "x-ignore-shared", true); + + return NULL; +} + +static void test_mode_reboot(void) +{ + g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, + FILE_TEST_FILENAME); + MigrateCommon args = { + .start.use_shmem = true, + .connect_uri = uri, + .listen_uri = "defer", + .start_hook = test_mode_reboot_start + }; + + test_file_common(&args, true); +} + static void test_precopy_tcp_plain(void) { MigrateCommon args = { @@ -3096,6 +3121,14 @@ int main(int argc, char **argv) qtest_add_func("/migration/precopy/file/offset/bad", test_precopy_file_offset_bad); + /* + * Our CI system has problems with shared memory. + * Don't run this test until we find a workaround. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { + qtest_add_func("/migration/mode/reboot", test_mode_reboot); + } + #ifdef CONFIG_GNUTLS qtest_add_func("/migration/precopy/unix/tls/psk", test_precopy_unix_tls_psk); From 7aa6070d09c5a6c83490599a3c564c64d7e2520a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 17 Oct 2023 16:26:29 -0400 Subject: [PATCH 26/40] migration: Refactor error handling in source return path rp_state.error was a boolean used to show error happened in return path thread. That's not only duplicating error reporting (migrate_set_error), but also not good enough in that we only do error_report() and set it to true, we never can keep a history of the exact error and show it in query-migrate. To make this better, a few things done: - Use error_setg() rather than error_report() across the whole lifecycle of return path thread, keeping the error in an Error*. - With above, no need to have mark_source_rp_bad(), remove it, alongside with rp_state.error itself. - Use migrate_set_error() to apply that captured error to the global migration object when error occured in this thread. - Do the same when detected qemufile error in source return path We need to re-export qemu_file_get_error_obj() to do the last one. Signed-off-by: Peter Xu Reviewed-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231017202633.296756-2-peterx@redhat.com> --- migration/migration.c | 121 ++++++++++++++++++----------------------- migration/migration.h | 1 - migration/qemu-file.c | 2 +- migration/qemu-file.h | 1 + migration/ram.c | 41 +++++++------- migration/ram.h | 5 +- migration/trace-events | 4 +- 7 files changed, 80 insertions(+), 95 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 11c1490090..455ddc896a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -99,7 +99,7 @@ static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); static void migrate_fd_cancel(MigrationState *s); -static int close_return_path_on_source(MigrationState *s); +static bool close_return_path_on_source(MigrationState *s); static void migration_downtime_start(MigrationState *s) { @@ -1475,7 +1475,6 @@ int migrate_init(MigrationState *s, Error **errp) s->to_dst_file = NULL; s->state = MIGRATION_STATUS_NONE; s->rp_state.from_dst_file = NULL; - s->rp_state.error = false; s->mbps = 0.0; s->pages_per_second = 0.0; s->downtime = 0; @@ -1883,16 +1882,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) qemu_sem_post(&s->pause_sem); } -/* migration thread support */ -/* - * Something bad happened to the RP stream, mark an error - * The caller shall print or trace something to indicate why - */ -static void mark_source_rp_bad(MigrationState *s) -{ - s->rp_state.error = true; -} - void migration_rp_wait(MigrationState *s) { qemu_sem_wait(&s->rp_state.rp_sem); @@ -1923,8 +1912,9 @@ static struct rp_cmd_args { * We're allowed to send more than requested (e.g. to round to our page size) * and we don't need to send pages that have already been sent. */ -static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, - ram_addr_t start, size_t len) +static void +migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, + ram_addr_t start, size_t len, Error **errp) { long our_host_ps = qemu_real_host_page_size(); @@ -1936,37 +1926,36 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, */ if (!QEMU_IS_ALIGNED(start, our_host_ps) || !QEMU_IS_ALIGNED(len, our_host_ps)) { - error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT - " len: %zd", __func__, start, len); - mark_source_rp_bad(ms); + error_setg(errp, "MIG_RP_MSG_REQ_PAGES: Misaligned page request, start:" + RAM_ADDR_FMT " len: %zd", start, len); return; } - if (ram_save_queue_pages(rbname, start, len)) { - mark_source_rp_bad(ms); - } + ram_save_queue_pages(rbname, start, len, errp); } -static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) +static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name, + Error **errp) { RAMBlock *block = qemu_ram_block_by_name(block_name); if (!block) { - error_report("%s: invalid block name '%s'", __func__, block_name); + error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'", + block_name); return -EINVAL; } /* Fetch the received bitmap and refresh the dirty bitmap */ - return ram_dirty_bitmap_reload(s, block); + return ram_dirty_bitmap_reload(s, block, errp); } -static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) +static int migrate_handle_rp_resume_ack(MigrationState *s, + uint32_t value, Error **errp) { trace_source_return_path_thread_resume_ack(value); if (value != MIGRATION_RESUME_ACK_VALUE) { - error_report("%s: illegal resume_ack value %"PRIu32, - __func__, value); + error_setg(errp, "illegal resume_ack value %"PRIu32, value); return -1; } @@ -2025,48 +2014,46 @@ static void *source_return_path_thread(void *opaque) uint32_t tmp32, sibling_error; ram_addr_t start = 0; /* =0 to silence warning */ size_t len = 0, expected_len; + Error *err = NULL; int res; trace_source_return_path_thread_entry(); rcu_register_thread(); - while (!ms->rp_state.error && !qemu_file_get_error(rp) && - migration_is_setup_or_active(ms->state)) { + while (migration_is_setup_or_active(ms->state)) { trace_source_return_path_thread_loop_top(); + header_type = qemu_get_be16(rp); header_len = qemu_get_be16(rp); if (qemu_file_get_error(rp)) { - mark_source_rp_bad(ms); + qemu_file_get_error_obj(rp, &err); goto out; } if (header_type >= MIG_RP_MSG_MAX || header_type == MIG_RP_MSG_INVALID) { - error_report("RP: Received invalid message 0x%04x length 0x%04x", - header_type, header_len); - mark_source_rp_bad(ms); + error_setg(&err, "Received invalid message 0x%04x length 0x%04x", + header_type, header_len); goto out; } if ((rp_cmd_args[header_type].len != -1 && header_len != rp_cmd_args[header_type].len) || header_len > sizeof(buf)) { - error_report("RP: Received '%s' message (0x%04x) with" - "incorrect length %d expecting %zu", - rp_cmd_args[header_type].name, header_type, header_len, - (size_t)rp_cmd_args[header_type].len); - mark_source_rp_bad(ms); + error_setg(&err, "Received '%s' message (0x%04x) with" + "incorrect length %d expecting %zu", + rp_cmd_args[header_type].name, header_type, header_len, + (size_t)rp_cmd_args[header_type].len); goto out; } /* We know we've got a valid header by this point */ res = qemu_get_buffer(rp, buf, header_len); if (res != header_len) { - error_report("RP: Failed reading data for message 0x%04x" - " read %d expected %d", - header_type, res, header_len); - mark_source_rp_bad(ms); + error_setg(&err, "Failed reading data for message 0x%04x" + " read %d expected %d", + header_type, res, header_len); goto out; } @@ -2076,8 +2063,7 @@ static void *source_return_path_thread(void *opaque) sibling_error = ldl_be_p(buf); trace_source_return_path_thread_shut(sibling_error); if (sibling_error) { - error_report("RP: Sibling indicated error %d", sibling_error); - mark_source_rp_bad(ms); + error_setg(&err, "Sibling indicated error %d", sibling_error); } /* * We'll let the main thread deal with closing the RP @@ -2095,7 +2081,10 @@ static void *source_return_path_thread(void *opaque) case MIG_RP_MSG_REQ_PAGES: start = ldq_be_p(buf); len = ldl_be_p(buf + 8); - migrate_handle_rp_req_pages(ms, NULL, start, len); + migrate_handle_rp_req_pages(ms, NULL, start, len, &err); + if (err) { + goto out; + } break; case MIG_RP_MSG_REQ_PAGES_ID: @@ -2110,32 +2099,32 @@ static void *source_return_path_thread(void *opaque) expected_len += tmp32; } if (header_len != expected_len) { - error_report("RP: Req_Page_id with length %d expecting %zd", - header_len, expected_len); - mark_source_rp_bad(ms); + error_setg(&err, "Req_Page_id with length %d expecting %zd", + header_len, expected_len); + goto out; + } + migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len, + &err); + if (err) { goto out; } - migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); break; case MIG_RP_MSG_RECV_BITMAP: if (header_len < 1) { - error_report("%s: missing block name", __func__); - mark_source_rp_bad(ms); + error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name"); goto out; } /* Format: len (1B) + idstr (<255B). This ends the idstr. */ buf[buf[0] + 1] = '\0'; - if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { - mark_source_rp_bad(ms); + if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { goto out; } break; case MIG_RP_MSG_RESUME_ACK: tmp32 = ldl_be_p(buf); - if (migrate_handle_rp_resume_ack(ms, tmp32)) { - mark_source_rp_bad(ms); + if (migrate_handle_rp_resume_ack(ms, tmp32, &err)) { goto out; } break; @@ -2151,13 +2140,15 @@ static void *source_return_path_thread(void *opaque) } out: - if (qemu_file_get_error(rp)) { + if (err) { + migrate_set_error(ms, err); + error_free(err); trace_source_return_path_thread_bad_end(); - mark_source_rp_bad(ms); } trace_source_return_path_thread_end(); rcu_unregister_thread(); + return NULL; } @@ -2179,12 +2170,11 @@ static int open_return_path_on_source(MigrationState *ms) return 0; } -static int close_return_path_on_source(MigrationState *ms) +/* Return true if error detected, or false otherwise */ +static bool close_return_path_on_source(MigrationState *ms) { - int ret; - if (!ms->rp_state.rp_thread_created) { - return 0; + return false; } trace_migration_return_path_end_before(); @@ -2202,18 +2192,13 @@ static int close_return_path_on_source(MigrationState *ms) } } - trace_await_return_path_close_on_source_joining(); qemu_thread_join(&ms->rp_state.rp_thread); ms->rp_state.rp_thread_created = false; - trace_await_return_path_close_on_source_close(); - - ret = ms->rp_state.error; - ms->rp_state.error = false; - migration_release_dst_files(ms); + trace_migration_return_path_end_after(); - trace_migration_return_path_end_after(ret); - return ret; + /* Return path will persist the error in MigrationState when quit */ + return migrate_has_error(ms); } static inline void diff --git a/migration/migration.h b/migration/migration.h index 5944107ad5..615b517594 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -308,7 +308,6 @@ struct MigrationState { /* Protected by qemu_file_lock */ QEMUFile *from_dst_file; QemuThread rp_thread; - bool error; /* * We can also check non-zero of rp_thread, but there's no "official" * way to do this, so this bool makes it slightly more elegant. diff --git a/migration/qemu-file.c b/migration/qemu-file.c index d64500310d..94231ff295 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -137,7 +137,7 @@ QEMUFile *qemu_file_new_input(QIOChannel *ioc) * * If errp is specified, a verbose error message will be copied over. */ -static int qemu_file_get_error_obj(QEMUFile *f, Error **errp) +int qemu_file_get_error_obj(QEMUFile *f, Error **errp) { if (!f->last_error) { return 0; diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 1774116f79..8aec9fabf7 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -68,6 +68,7 @@ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); void qemu_file_skip(QEMUFile *f, int size); int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp); void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err); +int qemu_file_get_error_obj(QEMUFile *f, Error **errp); void qemu_file_set_error(QEMUFile *f, int ret); int qemu_file_shutdown(QEMUFile *f); QEMUFile *qemu_file_get_return_path(QEMUFile *f); diff --git a/migration/ram.c b/migration/ram.c index 34724e8fe8..d05ffddbc8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1898,7 +1898,8 @@ static void migration_page_queue_free(RAMState *rs) * @start: starting address from the start of the RAMBlock * @len: length (in bytes) to send */ -int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len, + Error **errp) { RAMBlock *ramblock; RAMState *rs = ram_state; @@ -1915,7 +1916,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) * Shouldn't happen, we can't reuse the last RAMBlock if * it's the 1st request. */ - error_report("ram_save_queue_pages no previous block"); + error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no previous block"); return -1; } } else { @@ -1923,16 +1924,17 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) if (!ramblock) { /* We shouldn't be asked for a non-existent RAMBlock */ - error_report("ram_save_queue_pages no block '%s'", rbname); + error_setg(errp, "MIG_RP_MSG_REQ_PAGES has no block '%s'", rbname); return -1; } rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); if (!offset_in_ramblock(ramblock, start + len - 1)) { - error_report("%s request overrun start=" RAM_ADDR_FMT " len=" - RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, - __func__, start, len, ramblock->used_length); + error_setg(errp, "MIG_RP_MSG_REQ_PAGES request overrun, " + "start=" RAM_ADDR_FMT " len=" + RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, + start, len, ramblock->used_length); return -1; } @@ -1964,9 +1966,9 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) assert(len % page_size == 0); while (len) { if (ram_save_host_page_urgent(pss)) { - error_report("%s: ram_save_host_page_urgent() failed: " - "ramblock=%s, start_addr=0x"RAM_ADDR_FMT, - __func__, ramblock->idstr, start); + error_setg(errp, "ram_save_host_page_urgent() failed: " + "ramblock=%s, start_addr=0x"RAM_ADDR_FMT, + ramblock->idstr, start); ret = -1; break; } @@ -4110,7 +4112,7 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) * This is only used when the postcopy migration is paused but wants * to resume from a middle point. */ -int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) +int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) { int ret = -EINVAL; /* from_dst_file is always valid because we're within rp_thread */ @@ -4124,8 +4126,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) trace_ram_dirty_bitmap_reload_begin(block->idstr); if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { - error_report("%s: incorrect state %s", __func__, - MigrationStatus_str(s->state)); + error_setg(errp, "Reload bitmap in incorrect state %s", + MigrationStatus_str(s->state)); return -EINVAL; } @@ -4142,9 +4144,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) /* The size of the bitmap should match with our ramblock */ if (size != local_size) { - error_report("%s: ramblock '%s' bitmap size mismatch " - "(0x%"PRIx64" != 0x%"PRIx64")", __func__, - block->idstr, size, local_size); + error_setg(errp, "ramblock '%s' bitmap size mismatch (0x%"PRIx64 + " != 0x%"PRIx64")", block->idstr, size, local_size); return -EINVAL; } @@ -4153,15 +4154,15 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) ret = qemu_file_get_error(file); if (ret || size != local_size) { - error_report("%s: read bitmap failed for ramblock '%s': %d" - " (size 0x%"PRIx64", got: 0x%"PRIx64")", - __func__, block->idstr, ret, local_size, size); + error_setg(errp, "read bitmap failed for ramblock '%s': %d" + " (size 0x%"PRIx64", got: 0x%"PRIx64")", + block->idstr, ret, local_size, size); return -EIO; } if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) { - error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64, - __func__, block->idstr, end_mark); + error_setg(errp, "ramblock '%s' end mark incorrect: 0x%"PRIx64, + block->idstr, end_mark); return -EINVAL; } diff --git a/migration/ram.h b/migration/ram.h index 9f3ad1ee81..6e2c2c1950 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -50,7 +50,8 @@ uint64_t ram_bytes_total(void); void mig_throttle_counter_reset(void); uint64_t ram_pagesize_summary(void); -int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len, + Error **errp); void ram_postcopy_migrated_memory_release(MigrationState *ms); /* For outgoing discard bitmap */ void ram_postcopy_send_discard_bitmap(MigrationState *ms); @@ -70,7 +71,7 @@ void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); -int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); +int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); diff --git a/migration/trace-events b/migration/trace-events index e54f317e3b..de4a743c8a 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -150,8 +150,6 @@ multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname) "ioc=%p ioctype=%s hostname=%s" # migration.c -await_return_path_close_on_source_close(void) "" -await_return_path_close_on_source_joining(void) "" migrate_set_state(const char *new_state) "new state %s" migrate_fd_cleanup(void) "" migrate_fd_error(const char *error_desc) "error=%s" @@ -168,7 +166,7 @@ migration_completion_postcopy_end_after_complete(void) "" migration_rate_limit_pre(int ms) "%d ms" migration_rate_limit_post(int urgent) "urgent: %d" migration_return_path_end_before(void) "" -migration_return_path_end_after(int rp_error) "%d" +migration_return_path_end_after(void) "" migration_thread_after_loop(void) "" migration_thread_file_err(void) "" migration_thread_setup_complete(void) "" From f8c543e808f20ba936d94cfa5fc592627d8d100c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 17 Oct 2023 16:26:30 -0400 Subject: [PATCH 27/40] migration: Allow network to fail even during recovery Normally the postcopy recover phase should only exist for a super short period, that's the duration when QEMU is trying to recover from an interrupted postcopy migration, during which handshake will be carried out for continuing the procedure with state changes from PAUSED -> RECOVER -> POSTCOPY_ACTIVE again. Here RECOVER phase should be super small, that happens right after the admin specified a new but working network link for QEMU to reconnect to dest QEMU. However there can still be case where the channel is broken in this small RECOVER window. If it happens, with current code there's no way the src QEMU can got kicked out of RECOVER stage. No way either to retry the recover in another channel when established. This patch allows the RECOVER phase to fail itself too - we're mostly ready, just some small things missing, e.g. properly kick the main migration thread out when sleeping on rp_sem when we found that we're at RECOVER stage. When this happens, it fails the RECOVER itself, and rollback to PAUSED stage. Then the user can retry another round of recovery. To make it even stronger, teach QMP command migrate-pause to explicitly kick src/dst QEMU out when needed, so even if for some reason the migration thread didn't got kicked out already by a failing rethrn-path thread, the admin can also kick it out. This will be an super, super corner case, but still try to cover that. One can try to test this with two proxy channels for migration: (a) socat unix-listen:/tmp/src.sock,reuseaddr,fork tcp:localhost:10000 (b) socat tcp-listen:10000,reuseaddr,fork unix:/tmp/dst.sock So the migration channel will be: (a) (b) src -> /tmp/src.sock -> tcp:10000 -> /tmp/dst.sock -> dst Then to make QEMU hang at RECOVER stage, one can do below: (1) stop the postcopy using QMP command postcopy-pause (2) kill the 2nd proxy (b) (3) try to recover the postcopy using /tmp/src.sock on src (4) src QEMU will go into RECOVER stage but won't be able to continue from there, because the channel is actually broken at (b) Before this patch, step (4) will make src QEMU stuck in RECOVER stage, without a way to kick the QEMU out or continue the postcopy again. After this patch, (4) will quickly fail qemu and bounce back to PAUSED stage. Admin can also kick QEMU from (4) into PAUSED when needed using migrate-pause when needed. After bouncing back to PAUSED stage, one can recover again. Reported-by: Xiaohui Li Reviewed-by: Fabiano Rosas Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111332 Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Signed-off-by: Juan Quintela Message-ID: <20231017202633.296756-3-peterx@redhat.com> --- migration/migration.c | 63 +++++++++++++++++++++++++++++++++++++++---- migration/migration.h | 8 ++++-- migration/ram.c | 4 ++- 3 files changed, 67 insertions(+), 8 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 455ddc896a..e875ea0d6b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1393,6 +1393,17 @@ bool migration_in_postcopy(void) } } +bool migration_postcopy_is_alive(int state) +{ + switch (state) { + case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_RECOVER: + return true; + default: + return false; + } +} + bool migration_in_postcopy_after_devices(MigrationState *s) { return migration_in_postcopy() && s->postcopy_after_devices; @@ -1673,8 +1684,15 @@ void qmp_migrate_pause(Error **errp) MigrationIncomingState *mis = migration_incoming_get_current(); int ret = 0; - if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_postcopy_is_alive(ms->state)) { /* Source side, during postcopy */ + Error *error = NULL; + + /* Tell the core migration that we're pausing */ + error_setg(&error, "Postcopy migration is paused by the user"); + migrate_set_error(ms, error); + error_free(error); + qemu_mutex_lock(&ms->qemu_file_lock); if (ms->to_dst_file) { ret = qemu_file_shutdown(ms->to_dst_file); @@ -1683,10 +1701,17 @@ void qmp_migrate_pause(Error **errp) if (ret) { error_setg(errp, "Failed to pause source migration"); } + + /* + * Kick the migration thread out of any waiting windows (on behalf + * of the rp thread). + */ + migration_rp_kick(ms); + return; } - if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { + if (migration_postcopy_is_alive(mis->state)) { ret = qemu_file_shutdown(mis->from_src_file); if (ret) { error_setg(errp, "Failed to pause destination migration"); @@ -1695,7 +1720,7 @@ void qmp_migrate_pause(Error **errp) } error_setg(errp, "migrate-pause is currently only supported " - "during postcopy-active state"); + "during postcopy-active or postcopy-recover state"); } bool migration_is_blocked(Error **errp) @@ -1882,9 +1907,21 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) qemu_sem_post(&s->pause_sem); } -void migration_rp_wait(MigrationState *s) +int migration_rp_wait(MigrationState *s) { + /* If migration has failure already, ignore the wait */ + if (migrate_has_error(s)) { + return -1; + } + qemu_sem_wait(&s->rp_state.rp_sem); + + /* After wait, double check that there's no failure */ + if (migrate_has_error(s)) { + return -1; + } + + return 0; } void migration_rp_kick(MigrationState *s) @@ -2146,6 +2183,20 @@ out: trace_source_return_path_thread_bad_end(); } + if (ms->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { + /* + * this will be extremely unlikely: that we got yet another network + * issue during recovering of the 1st network failure.. during this + * period the main migration thread can be waiting on rp_sem for + * this thread to sync with the other side. + * + * When this happens, explicitly kick the migration thread out of + * RECOVER stage and back to PAUSED, so the admin can try + * everything again. + */ + migration_rp_kick(ms); + } + trace_source_return_path_thread_end(); rcu_unregister_thread(); @@ -2611,7 +2662,9 @@ static int postcopy_resume_handshake(MigrationState *s) qemu_savevm_send_postcopy_resume(s->to_dst_file); while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { - migration_rp_wait(s); + if (migration_rp_wait(s)) { + return -1; + } } if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { diff --git a/migration/migration.h b/migration/migration.h index 615b517594..af8c965b7f 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -494,6 +494,7 @@ int migrate_init(MigrationState *s, Error **errp); bool migration_is_blocked(Error **errp); /* True if outgoing migration has entered postcopy phase */ bool migration_in_postcopy(void); +bool migration_postcopy_is_alive(int state); MigrationState *migrate_get_current(void); uint64_t ram_get_total_transferred_pages(void); @@ -534,8 +535,11 @@ void migration_populate_vfio_info(MigrationInfo *info); void migration_reset_vfio_bytes_transferred(void); void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); -/* Migration thread waiting for return path thread. */ -void migration_rp_wait(MigrationState *s); +/* + * Migration thread waiting for return path thread. Return non-zero if an + * error is detected. + */ +int migration_rp_wait(MigrationState *s); /* * Kick the migration thread waiting for return path messages. NOTE: the * name can be slightly confusing (when read as "kick the rp thread"), just diff --git a/migration/ram.c b/migration/ram.c index d05ffddbc8..929cba08f4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4099,7 +4099,9 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) /* Wait until all the ramblocks' dirty bitmap synced */ while (qatomic_read(&rs->postcopy_bmap_sync_requested)) { - migration_rp_wait(s); + if (migration_rp_wait(s)) { + return -1; + } } trace_ram_dirty_bitmap_sync_complete(); From 7bca2bb7bfd2fe5241696fd09cc97f23661d4fc8 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 17 Oct 2023 16:26:31 -0400 Subject: [PATCH 28/40] tests/migration-test: Add a test for postcopy hangs during RECOVER To do so, create two paired sockets, but make them not providing real data. Feed those fake sockets to src/dst QEMUs for recovery to let them go into RECOVER stage without going out. Test that we can always kick it out and recover again with the right ports. This patch is based on Fabiano's version here: https://lore.kernel.org/r/877cowmdu0.fsf@suse.de Signed-off-by: Fabiano Rosas [peterx: write commit message, remove case 1, fix bugs, and more] Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231017202633.296756-4-peterx@redhat.com> --- tests/qtest/migration-test.c | 110 +++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index b7ebc23903..047b7194df 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -728,6 +728,7 @@ typedef struct { /* Postcopy specific fields */ void *postcopy_data; bool postcopy_preempt; + bool postcopy_recovery_test_fail; } MigrateCommon; static int test_migrate_start(QTestState **from, QTestState **to, @@ -1404,6 +1405,80 @@ static void test_postcopy_preempt_tls_psk(void) } #endif +static void wait_for_postcopy_status(QTestState *one, const char *status) +{ + wait_for_migration_status(one, status, + (const char * []) { "failed", "active", + "completed", NULL }); +} + +#ifndef _WIN32 +static void postcopy_recover_fail(QTestState *from, QTestState *to) +{ + int ret, pair1[2], pair2[2]; + char c; + + /* Create two unrelated socketpairs */ + ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair1); + g_assert_cmpint(ret, ==, 0); + + ret = qemu_socketpair(PF_LOCAL, SOCK_STREAM, 0, pair2); + g_assert_cmpint(ret, ==, 0); + + /* + * Give the guests unpaired ends of the sockets, so they'll all blocked + * at reading. This mimics a wrong channel established. + */ + qtest_qmp_fds_assert_success(from, &pair1[0], 1, + "{ 'execute': 'getfd'," + " 'arguments': { 'fdname': 'fd-mig' }}"); + qtest_qmp_fds_assert_success(to, &pair2[0], 1, + "{ 'execute': 'getfd'," + " 'arguments': { 'fdname': 'fd-mig' }}"); + + /* + * Write the 1st byte as QEMU_VM_COMMAND (0x8) for the dest socket, to + * emulate the 1st byte of a real recovery, but stops from there to + * keep dest QEMU in RECOVER. This is needed so that we can kick off + * the recover process on dest QEMU (by triggering the G_IO_IN event). + * + * NOTE: this trick is not needed on src QEMUs, because src doesn't + * rely on an pre-existing G_IO_IN event, so it will always trigger the + * upcoming recovery anyway even if it can read nothing. + */ +#define QEMU_VM_COMMAND 0x08 + c = QEMU_VM_COMMAND; + ret = send(pair2[1], &c, 1, 0); + g_assert_cmpint(ret, ==, 1); + + migrate_recover(to, "fd:fd-mig"); + migrate_qmp(from, "fd:fd-mig", "{'resume': true}"); + + /* + * Make sure both QEMU instances will go into RECOVER stage, then test + * kicking them out using migrate-pause. + */ + wait_for_postcopy_status(from, "postcopy-recover"); + wait_for_postcopy_status(to, "postcopy-recover"); + + /* + * This would be issued by the admin upon noticing the hang, we should + * make sure we're able to kick this out. + */ + migrate_pause(from); + wait_for_postcopy_status(from, "postcopy-paused"); + + /* Do the same test on dest */ + migrate_pause(to); + wait_for_postcopy_status(to, "postcopy-paused"); + + close(pair1[0]); + close(pair1[1]); + close(pair2[0]); + close(pair2[1]); +} +#endif /* _WIN32 */ + static void test_postcopy_recovery_common(MigrateCommon *args) { QTestState *from, *to; @@ -1439,9 +1514,19 @@ static void test_postcopy_recovery_common(MigrateCommon *args) * migrate-recover command can only succeed if destination machine * is in the paused state */ - wait_for_migration_status(to, "postcopy-paused", - (const char * []) { "failed", "active", - "completed", NULL }); + wait_for_postcopy_status(to, "postcopy-paused"); + wait_for_postcopy_status(from, "postcopy-paused"); + +#ifndef _WIN32 + if (args->postcopy_recovery_test_fail) { + /* + * Test when a wrong socket specified for recover, and then the + * ability to kick it out, and continue with a correct socket. + */ + postcopy_recover_fail(from, to); + /* continue with a good recovery */ + } +#endif /* _WIN32 */ /* * Create a new socket to emulate a new channel that is different @@ -1455,9 +1540,6 @@ static void test_postcopy_recovery_common(MigrateCommon *args) * Try to rebuild the migration channel using the resume flag and * the newly created channel */ - wait_for_migration_status(from, "postcopy-paused", - (const char * []) { "failed", "active", - "completed", NULL }); migrate_qmp(from, uri, "{'resume': true}"); /* Restore the postcopy bandwidth to unlimited */ @@ -1482,6 +1564,17 @@ static void test_postcopy_recovery_compress(void) test_postcopy_recovery_common(&args); } +#ifndef _WIN32 +static void test_postcopy_recovery_double_fail(void) +{ + MigrateCommon args = { + .postcopy_recovery_test_fail = true, + }; + + test_postcopy_recovery_common(&args); +} +#endif /* _WIN32 */ + #ifdef CONFIG_GNUTLS static void test_postcopy_recovery_tls_psk(void) { @@ -3093,6 +3186,11 @@ int main(int argc, char **argv) qtest_add_func("/migration/postcopy/recovery/compress/plain", test_postcopy_recovery_compress); } +#ifndef _WIN32 + qtest_add_func("/migration/postcopy/recovery/double-failures", + test_postcopy_recovery_double_fail); +#endif /* _WIN32 */ + } qtest_add_func("/migration/bad_dest", test_baddest); From 88577f32421cd6f52928b0c69573983d8d9acbd0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 17 Oct 2023 16:26:32 -0400 Subject: [PATCH 29/40] migration: Change ram_dirty_bitmap_reload() retval to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we have a Error** passed into the return path thread stack, which is even clearer than an int retval. Change ram_dirty_bitmap_reload() and the callers to use a bool instead to replace errnos. Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231017202633.296756-5-peterx@redhat.com> --- migration/migration.c | 18 +++++++++--------- migration/ram.c | 24 ++++++++++++------------ migration/ram.h | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index e875ea0d6b..ce9b7692ad 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1971,29 +1971,29 @@ migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, ram_save_queue_pages(rbname, start, len, errp); } -static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name, - Error **errp) +static bool migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name, + Error **errp) { RAMBlock *block = qemu_ram_block_by_name(block_name); if (!block) { error_setg(errp, "MIG_RP_MSG_RECV_BITMAP has invalid block name '%s'", block_name); - return -EINVAL; + return false; } /* Fetch the received bitmap and refresh the dirty bitmap */ return ram_dirty_bitmap_reload(s, block, errp); } -static int migrate_handle_rp_resume_ack(MigrationState *s, - uint32_t value, Error **errp) +static bool migrate_handle_rp_resume_ack(MigrationState *s, + uint32_t value, Error **errp) { trace_source_return_path_thread_resume_ack(value); if (value != MIGRATION_RESUME_ACK_VALUE) { error_setg(errp, "illegal resume_ack value %"PRIu32, value); - return -1; + return false; } /* Now both sides are active. */ @@ -2003,7 +2003,7 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, /* Notify send thread that time to continue send pages */ migration_rp_kick(s); - return 0; + return true; } /* @@ -2154,14 +2154,14 @@ static void *source_return_path_thread(void *opaque) } /* Format: len (1B) + idstr (<255B). This ends the idstr. */ buf[buf[0] + 1] = '\0'; - if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { + if (!migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) { goto out; } break; case MIG_RP_MSG_RESUME_ACK: tmp32 = ldl_be_p(buf); - if (migrate_handle_rp_resume_ack(ms, tmp32, &err)) { + if (!migrate_handle_rp_resume_ack(ms, tmp32, &err)) { goto out; } break; diff --git a/migration/ram.c b/migration/ram.c index 929cba08f4..a0f3b86663 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4113,10 +4113,11 @@ static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) * Read the received bitmap, revert it as the initial dirty bitmap. * This is only used when the postcopy migration is paused but wants * to resume from a middle point. + * + * Returns true if succeeded, false for errors. */ -int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) +bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) { - int ret = -EINVAL; /* from_dst_file is always valid because we're within rp_thread */ QEMUFile *file = s->rp_state.from_dst_file; g_autofree unsigned long *le_bitmap = NULL; @@ -4130,7 +4131,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { error_setg(errp, "Reload bitmap in incorrect state %s", MigrationStatus_str(s->state)); - return -EINVAL; + return false; } /* @@ -4148,24 +4149,23 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) if (size != local_size) { error_setg(errp, "ramblock '%s' bitmap size mismatch (0x%"PRIx64 " != 0x%"PRIx64")", block->idstr, size, local_size); - return -EINVAL; + return false; } size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size); end_mark = qemu_get_be64(file); - ret = qemu_file_get_error(file); - if (ret || size != local_size) { - error_setg(errp, "read bitmap failed for ramblock '%s': %d" - " (size 0x%"PRIx64", got: 0x%"PRIx64")", - block->idstr, ret, local_size, size); - return -EIO; + if (qemu_file_get_error(file) || size != local_size) { + error_setg(errp, "read bitmap failed for ramblock '%s': " + "(size 0x%"PRIx64", got: 0x%"PRIx64")", + block->idstr, local_size, size); + return false; } if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) { error_setg(errp, "ramblock '%s' end mark incorrect: 0x%"PRIx64, block->idstr, end_mark); - return -EINVAL; + return false; } /* @@ -4197,7 +4197,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block, Error **errp) */ migration_rp_kick(s); - return 0; + return true; } static int ram_resume_prepare(MigrationState *s, void *opaque) diff --git a/migration/ram.h b/migration/ram.h index 6e2c2c1950..9b937a446b 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -71,7 +71,7 @@ void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); -int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); +bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); From e034f8836492d6a4c64a6a0079877fc3c7e09f36 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:40 -0300 Subject: [PATCH 30/40] migration: New QAPI type 'MigrateAddress' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces well defined MigrateAddress struct and its related child objects. The existing argument of 'migrate' and 'migrate-incoming' QAPI - 'uri' is of type string. The current implementation follows double encoding scheme for fetching migration parameters like 'uri' and this is not an ideal design. Motive for intoducing struct level design is to prevent double encoding of QAPI arguments, as Qemu should be able to directly use the QAPI arguments without any level of encoding. Note: this commit only adds the type, and actual uses comes in later commits. Fabiano fixed for "file" transport. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Reviewed-by: Juan Quintela Reviewed-by: Daniel P. Berrangé Acked-by: Markus Armbruster Signed-off-by: Fabiano Rosas Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-2-farosas@suse.de> Message-Id: <20231023182053.8711-3-farosas@suse.de> --- qapi/migration.json | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/qapi/migration.json b/qapi/migration.json index 3daeffc95d..182b150b87 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1584,6 +1584,63 @@ ## { 'command': 'migrate-continue', 'data': {'state': 'MigrationStatus'} } +## +# @MigrationAddressType: +# +# The migration stream transport mechanisms. +# +# @socket: Migrate via socket. +# +# @exec: Direct the migration stream to another process. +# +# @rdma: Migrate via RDMA. +# +# @file: Direct the migration stream to a file. +# +# Since 8.2 +## +{ 'enum': 'MigrationAddressType', + 'data': [ 'socket', 'exec', 'rdma', 'file' ] } + +## +# @FileMigrationArgs: +# +# @filename: The file to receive the migration stream +# +# @offset: The file offset where the migration stream will start +# +# Since 8.2 +## +{ 'struct': 'FileMigrationArgs', + 'data': { 'filename': 'str', + 'offset': 'uint64' } } + +## +# @MigrationExecCommand: +# +# @args: command (list head) and arguments to execute. +# +# Since 8.2 +## +{ 'struct': 'MigrationExecCommand', + 'data': {'args': [ 'str' ] } } + +## +# @MigrationAddress: +# +# Migration endpoint configuration. +# +# Since 8.2 +## +{ 'union': 'MigrationAddress', + 'base': { 'transport' : 'MigrationAddressType'}, + 'discriminator': 'transport', + 'data': { + 'socket': 'SocketAddress', + 'exec': 'MigrationExecCommand', + 'rdma': 'InetSocketAddress', + 'file': 'FileMigrationArgs' } } + ## # @migrate: # From 72a8192e225ceafd8494150eb9375c400da50dd6 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:42 -0300 Subject: [PATCH 31/40] migration: convert migration 'uri' into 'MigrateAddress' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch parses 'migrate' and 'migrate-incoming' QAPI's 'uri' string containing migration connection related information and stores them inside well defined 'MigrateAddress' struct. Fabiano fixed for "file" transport. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Signed-off-by: Fabiano Rosas Reviewed-by: Daniel P. Berrangé Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-4-farosas@suse.de> Message-ID: <20231023182053.8711-5-farosas@suse.de> --- migration/exec.c | 1 - migration/exec.h | 4 +++ migration/file.c | 2 +- migration/file.h | 1 + migration/migration.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) diff --git a/migration/exec.c b/migration/exec.c index 2bf882bbe1..32f5143dfd 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -27,7 +27,6 @@ #include "qemu/cutils.h" #ifdef WIN32 -const char *exec_get_cmd_path(void); const char *exec_get_cmd_path(void) { g_autofree char *detected_path = g_new(char, MAX_PATH); diff --git a/migration/exec.h b/migration/exec.h index b210ffde7a..736cd71028 100644 --- a/migration/exec.h +++ b/migration/exec.h @@ -19,6 +19,10 @@ #ifndef QEMU_MIGRATION_EXEC_H #define QEMU_MIGRATION_EXEC_H + +#ifdef WIN32 +const char *exec_get_cmd_path(void); +#endif void exec_start_incoming_migration(const char *host_port, Error **errp); void exec_start_outgoing_migration(MigrationState *s, const char *host_port, diff --git a/migration/file.c b/migration/file.c index cf5b1bf365..ec069ef329 100644 --- a/migration/file.c +++ b/migration/file.c @@ -19,7 +19,7 @@ /* Remove the offset option from @filespec and return it in @offsetp. */ -static int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp) +int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp) { char *option = strstr(filespec, OFFSET_OPTION); int ret; diff --git a/migration/file.h b/migration/file.h index 90fa4849e0..3888a57105 100644 --- a/migration/file.h +++ b/migration/file.h @@ -11,4 +11,5 @@ void file_start_incoming_migration(const char *filename, Error **errp); void file_start_outgoing_migration(MigrationState *s, const char *filename, Error **errp); +int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp); #endif diff --git a/migration/migration.c b/migration/migration.c index ce9b7692ad..a1bace259e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -66,6 +66,7 @@ #include "sysemu/qtest.h" #include "options.h" #include "sysemu/dirtylimit.h" +#include "qemu/sockets.h" static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -453,9 +454,62 @@ void migrate_add_address(SocketAddress *address) QAPI_CLONE(SocketAddress, address)); } +static bool migrate_uri_parse(const char *uri, + MigrationAddress **channel, + Error **errp) +{ + g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1); + SocketAddress *saddr = NULL; + InetSocketAddress *isock = &addr->u.rdma; + strList **tail = &addr->u.exec.args; + + if (strstart(uri, "exec:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_EXEC; +#ifdef WIN32 + QAPI_LIST_APPEND(tail, g_strdup(exec_get_cmd_path())); + QAPI_LIST_APPEND(tail, g_strdup("/c")); +#else + QAPI_LIST_APPEND(tail, g_strdup("/bin/sh")); + QAPI_LIST_APPEND(tail, g_strdup("-c")); +#endif + QAPI_LIST_APPEND(tail, g_strdup(uri + strlen("exec:"))); + } else if (strstart(uri, "rdma:", NULL)) { + if (inet_parse(isock, uri + strlen("rdma:"), errp)) { + qapi_free_InetSocketAddress(isock); + return false; + } + addr->transport = MIGRATION_ADDRESS_TYPE_RDMA; + } else if (strstart(uri, "tcp:", NULL) || + strstart(uri, "unix:", NULL) || + strstart(uri, "vsock:", NULL) || + strstart(uri, "fd:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_SOCKET; + saddr = socket_parse(uri, errp); + if (!saddr) { + return false; + } + addr->u.socket.type = saddr->type; + addr->u.socket.u = saddr->u; + } else if (strstart(uri, "file:", NULL)) { + addr->transport = MIGRATION_ADDRESS_TYPE_FILE; + addr->u.file.filename = g_strdup(uri + strlen("file:")); + if (file_parse_offset(addr->u.file.filename, &addr->u.file.offset, + errp)) { + return false; + } + } else { + error_setg(errp, "unknown migration protocol: %s", uri); + return false; + } + + *channel = g_steal_pointer(&addr); + return true; +} + static void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p = NULL; + g_autoptr(MigrationAddress) channel = NULL; MigrationIncomingState *mis = migration_incoming_get_current(); /* URI is not suitable for migration? */ @@ -463,6 +517,10 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) return; } + if (uri && !migrate_uri_parse(uri, &channel, errp)) { + return; + } + migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); @@ -1840,12 +1898,17 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, Error *local_err = NULL; MigrationState *s = migrate_get_current(); const char *p = NULL; + g_autoptr(MigrationAddress) channel = NULL; /* URI is not suitable for migration? */ if (!migration_channels_and_uri_compatible(uri, errp)) { return; } + if (!migrate_uri_parse(uri, &channel, errp)) { + return; + } + resume_requested = has_resume && resume; if (!migrate_prepare(s, has_blk && blk, has_inc && inc, resume_requested, errp)) { From 34dfc5e4077039e93667b5a72a2fbbe2f1b96e4b Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:44 -0300 Subject: [PATCH 32/40] migration: convert socket backend to accept MigrateAddress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Socket transport backend for 'migrate'/'migrate-incoming' QAPIs accept new wire protocol of MigrateAddress struct. It is achived by parsing 'uri' string and storing migration parameters required for socket connection into well defined SocketAddress struct. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Reviewed-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-6-farosas@suse.de> --- migration/migration.c | 30 ++++++++++++++++++------------ migration/socket.c | 39 +++++++++------------------------------ migration/socket.h | 7 ++++--- 3 files changed, 31 insertions(+), 45 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index a1bace259e..846877135a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -524,10 +524,15 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - if (strstart(uri, "tcp:", &p) || - strstart(uri, "unix:", NULL) || - strstart(uri, "vsock:", NULL)) { - socket_start_incoming_migration(p ? p : uri, errp); + if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &channel->u.socket; + if (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { + socket_start_incoming_migration(saddr, errp); + } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { + fd_start_incoming_migration(saddr->u.fd.str, errp); + } #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { if (migrate_compress()) { @@ -546,8 +551,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) #endif } else if (strstart(uri, "exec:", &p)) { exec_start_incoming_migration(p, errp); - } else if (strstart(uri, "fd:", &p)) { - fd_start_incoming_migration(p, errp); } else if (strstart(uri, "file:", &p)) { file_start_incoming_migration(p, errp); } else { @@ -1922,18 +1925,21 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } } - if (strstart(uri, "tcp:", &p) || - strstart(uri, "unix:", NULL) || - strstart(uri, "vsock:", NULL)) { - socket_start_outgoing_migration(s, p ? p : uri, &local_err); + if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &channel->u.socket; + if (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { + socket_start_outgoing_migration(s, saddr, &local_err); + } else if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { + fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err); + } #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { rdma_start_outgoing_migration(s, p, &local_err); #endif } else if (strstart(uri, "exec:", &p)) { exec_start_outgoing_migration(s, p, &local_err); - } else if (strstart(uri, "fd:", &p)) { - fd_start_outgoing_migration(s, p, &local_err); } else if (strstart(uri, "file:", &p)) { file_start_outgoing_migration(s, p, &local_err); } else { diff --git a/migration/socket.c b/migration/socket.c index 1b6f5baefb..98e3ea1514 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -28,6 +28,8 @@ #include "trace.h" #include "postcopy-ram.h" #include "options.h" +#include "qapi/clone-visitor.h" +#include "qapi/qapi-visit-sockets.h" struct SocketOutgoingArgs { SocketAddress *saddr; @@ -108,19 +110,19 @@ out: object_unref(OBJECT(sioc)); } -static void -socket_start_outgoing_migration_internal(MigrationState *s, - SocketAddress *saddr, - Error **errp) +void socket_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, + Error **errp) { QIOChannelSocket *sioc = qio_channel_socket_new(); struct SocketConnectData *data = g_new0(struct SocketConnectData, 1); + SocketAddress *addr = QAPI_CLONE(SocketAddress, saddr); data->s = s; /* in case previous migration leaked it */ qapi_free_SocketAddress(outgoing_args.saddr); - outgoing_args.saddr = saddr; + outgoing_args.saddr = addr; if (saddr->type == SOCKET_ADDRESS_TYPE_INET) { data->hostname = g_strdup(saddr->u.inet.host); @@ -135,18 +137,6 @@ socket_start_outgoing_migration_internal(MigrationState *s, NULL); } -void socket_start_outgoing_migration(MigrationState *s, - const char *str, - Error **errp) -{ - Error *err = NULL; - SocketAddress *saddr = socket_parse(str, &err); - if (!err) { - socket_start_outgoing_migration_internal(s, saddr, &err); - } - error_propagate(errp, err); -} - static void socket_accept_incoming_migration(QIONetListener *listener, QIOChannelSocket *cioc, gpointer opaque) @@ -172,9 +162,8 @@ socket_incoming_migration_end(void *opaque) object_unref(OBJECT(listener)); } -static void -socket_start_incoming_migration_internal(SocketAddress *saddr, - Error **errp) +void socket_start_incoming_migration(SocketAddress *saddr, + Error **errp) { QIONetListener *listener = qio_net_listener_new(); MigrationIncomingState *mis = migration_incoming_get_current(); @@ -213,13 +202,3 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, } } -void socket_start_incoming_migration(const char *str, Error **errp) -{ - Error *err = NULL; - SocketAddress *saddr = socket_parse(str, &err); - if (!err) { - socket_start_incoming_migration_internal(saddr, &err); - } - qapi_free_SocketAddress(saddr); - error_propagate(errp, err); -} diff --git a/migration/socket.h b/migration/socket.h index dc54df4e6c..5e4c33b8ea 100644 --- a/migration/socket.h +++ b/migration/socket.h @@ -19,13 +19,14 @@ #include "io/channel.h" #include "io/task.h" +#include "qemu/sockets.h" void socket_send_channel_create(QIOTaskFunc f, void *data); QIOChannel *socket_send_channel_create_sync(Error **errp); int socket_send_channel_destroy(QIOChannel *send); -void socket_start_incoming_migration(const char *str, Error **errp); +void socket_start_incoming_migration(SocketAddress *saddr, Error **errp); -void socket_start_outgoing_migration(MigrationState *s, const char *str, - Error **errp); +void socket_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, Error **errp); #endif From 3fa9642ff7d51f7fc3ba68e6ccd13a939d5bd609 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:45 -0300 Subject: [PATCH 33/40] migration: convert rdma backend to accept MigrateAddress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RDMA based transport backend for 'migrate'/'migrate-incoming' QAPIs accept new wire protocol of MigrateAddress struct. It is achived by parsing 'uri' string and storing migration parameters required for RDMA connection into well defined InetSocketAddress struct. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Reviewed-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-7-farosas@suse.de> --- migration/migration.c | 8 ++++---- migration/rdma.c | 33 +++++++++++---------------------- migration/rdma.h | 6 ++++-- 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 846877135a..8c26672630 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -534,7 +534,7 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) fd_start_incoming_migration(saddr->u.fd.str, errp); } #ifdef CONFIG_RDMA - } else if (strstart(uri, "rdma:", &p)) { + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) { if (migrate_compress()) { error_setg(errp, "RDMA and compression can't be used together"); return; @@ -547,7 +547,7 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) error_setg(errp, "RDMA and multifd can't be used together"); return; } - rdma_start_incoming_migration(p, errp); + rdma_start_incoming_migration(&channel->u.rdma, errp); #endif } else if (strstart(uri, "exec:", &p)) { exec_start_incoming_migration(p, errp); @@ -1935,8 +1935,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err); } #ifdef CONFIG_RDMA - } else if (strstart(uri, "rdma:", &p)) { - rdma_start_outgoing_migration(s, p, &local_err); + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) { + rdma_start_outgoing_migration(s, &channel->u.rdma, &local_err); #endif } else if (strstart(uri, "exec:", &p)) { exec_start_outgoing_migration(s, p, &local_err); diff --git a/migration/rdma.c b/migration/rdma.c index 2938db4f64..6a29e53daf 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -289,7 +289,6 @@ typedef struct RDMALocalBlocks { typedef struct RDMAContext { char *host; int port; - char *host_port; RDMAWorkRequestData wr_data[RDMA_WRID_MAX]; @@ -2431,9 +2430,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) rdma->channel = NULL; } g_free(rdma->host); - g_free(rdma->host_port); rdma->host = NULL; - rdma->host_port = NULL; } @@ -2723,28 +2720,16 @@ static void qemu_rdma_return_path_dest_init(RDMAContext *rdma_return_path, rdma_return_path->is_return_path = true; } -static RDMAContext *qemu_rdma_data_init(const char *host_port, Error **errp) +static RDMAContext *qemu_rdma_data_init(InetSocketAddress *saddr, Error **errp) { RDMAContext *rdma = NULL; - InetSocketAddress *addr; rdma = g_new0(RDMAContext, 1); rdma->current_index = -1; rdma->current_chunk = -1; - addr = g_new(InetSocketAddress, 1); - if (!inet_parse(addr, host_port, NULL)) { - rdma->port = atoi(addr->port); - rdma->host = g_strdup(addr->host); - rdma->host_port = g_strdup(host_port); - } else { - error_setg(errp, "RDMA ERROR: bad RDMA migration address '%s'", - host_port); - g_free(rdma); - rdma = NULL; - } - - qapi_free_InetSocketAddress(addr); + rdma->host = g_strdup(saddr->host); + rdma->port = atoi(saddr->port); return rdma; } @@ -3353,6 +3338,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) .private_data_len = sizeof(cap), }; RDMAContext *rdma_return_path = NULL; + g_autoptr(InetSocketAddress) isock = g_new0(InetSocketAddress, 1); struct rdma_cm_event *cm_event; struct ibv_context *verbs; int ret; @@ -3367,13 +3353,16 @@ static int qemu_rdma_accept(RDMAContext *rdma) goto err_rdma_dest_wait; } + isock->host = rdma->host; + isock->port = g_strdup_printf("%d", rdma->port); + /* * initialize the RDMAContext for return path for postcopy after first * connection request reached. */ if ((migrate_postcopy() || migrate_return_path()) && !rdma->is_return_path) { - rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); + rdma_return_path = qemu_rdma_data_init(isock, NULL); if (rdma_return_path == NULL) { rdma_ack_cm_event(cm_event); goto err_rdma_dest_wait; @@ -4074,7 +4063,8 @@ static void rdma_accept_incoming_migration(void *opaque) } } -void rdma_start_incoming_migration(const char *host_port, Error **errp) +void rdma_start_incoming_migration(InetSocketAddress *host_port, + Error **errp) { MigrationState *s = migrate_get_current(); int ret; @@ -4118,13 +4108,12 @@ cleanup_rdma: err: if (rdma) { g_free(rdma->host); - g_free(rdma->host_port); } g_free(rdma); } void rdma_start_outgoing_migration(void *opaque, - const char *host_port, Error **errp) + InetSocketAddress *host_port, Error **errp) { MigrationState *s = opaque; RDMAContext *rdma_return_path = NULL; diff --git a/migration/rdma.h b/migration/rdma.h index 30b15b4466..a8d27f33b8 100644 --- a/migration/rdma.h +++ b/migration/rdma.h @@ -14,15 +14,17 @@ * */ +#include "qemu/sockets.h" + #ifndef QEMU_MIGRATION_RDMA_H #define QEMU_MIGRATION_RDMA_H #include "exec/memory.h" -void rdma_start_outgoing_migration(void *opaque, const char *host_port, +void rdma_start_outgoing_migration(void *opaque, InetSocketAddress *host_port, Error **errp); -void rdma_start_incoming_migration(const char *host_port, Error **errp); +void rdma_start_incoming_migration(InetSocketAddress *host_port, Error **errp); /* * Constants used by rdma return codes From cbab4face57b829e93e935b3bf52bf9f1bea0e42 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:46 -0300 Subject: [PATCH 34/40] migration: convert exec backend to accept MigrateAddress. Exec transport backend for 'migrate'/'migrate-incoming' QAPIs accept new wire protocol of MigrateAddress struct. It is achived by parsing 'uri' string and storing migration parameters required for exec connection into strList struct. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-8-farosas@suse.de> --- migration/exec.c | 73 +++++++++++++++++++++++++++++++------------ migration/exec.h | 4 +-- migration/migration.c | 8 ++--- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/migration/exec.c b/migration/exec.c index 32f5143dfd..47d2f3b8fb 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -39,20 +39,51 @@ const char *exec_get_cmd_path(void) } #endif -void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp) +/* provides the length of strList */ +static int +str_list_length(strList *list) +{ + int len = 0; + strList *elem; + + for (elem = list; elem != NULL; elem = elem->next) { + len++; + } + + return len; +} + +static void +init_exec_array(strList *command, char **argv, Error **errp) +{ + int i = 0; + strList *lst; + + for (lst = command; lst; lst = lst->next) { + argv[i++] = lst->value; + } + + argv[i] = NULL; + return; +} + +void exec_start_outgoing_migration(MigrationState *s, strList *command, + Error **errp) { QIOChannel *ioc; -#ifdef WIN32 - const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL }; -#else - const char *argv[] = { "/bin/sh", "-c", command, NULL }; -#endif + int length = str_list_length(command); + g_auto(GStrv) argv = (char **) g_new0(const char *, length + 1); - trace_migration_exec_outgoing(command); - ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv, - O_RDWR, - errp)); + init_exec_array(command, argv, errp); + g_autofree char *new_command = g_strjoinv(" ", (char **)argv); + + trace_migration_exec_outgoing(new_command); + ioc = QIO_CHANNEL( + qio_channel_command_new_spawn( + (const char * const *) g_steal_pointer(&argv), + O_RDWR, + errp)); if (!ioc) { return; } @@ -71,20 +102,22 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc, return G_SOURCE_REMOVE; } -void exec_start_incoming_migration(const char *command, Error **errp) +void exec_start_incoming_migration(strList *command, Error **errp) { QIOChannel *ioc; -#ifdef WIN32 - const char *argv[] = { exec_get_cmd_path(), "/c", command, NULL }; -#else - const char *argv[] = { "/bin/sh", "-c", command, NULL }; -#endif + int length = str_list_length(command); + g_auto(GStrv) argv = (char **) g_new0(const char *, length + 1); - trace_migration_exec_incoming(command); - ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv, - O_RDWR, - errp)); + init_exec_array(command, argv, errp); + g_autofree char *new_command = g_strjoinv(" ", (char **)argv); + + trace_migration_exec_incoming(new_command); + ioc = QIO_CHANNEL( + qio_channel_command_new_spawn( + (const char * const *) g_steal_pointer(&argv), + O_RDWR, + errp)); if (!ioc) { return; } diff --git a/migration/exec.h b/migration/exec.h index 736cd71028..3107f205e3 100644 --- a/migration/exec.h +++ b/migration/exec.h @@ -23,8 +23,8 @@ #ifdef WIN32 const char *exec_get_cmd_path(void); #endif -void exec_start_incoming_migration(const char *host_port, Error **errp); +void exec_start_incoming_migration(strList *host_port, Error **errp); -void exec_start_outgoing_migration(MigrationState *s, const char *host_port, +void exec_start_outgoing_migration(MigrationState *s, strList *host_port, Error **errp); #endif diff --git a/migration/migration.c b/migration/migration.c index 8c26672630..0911fb310c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -549,8 +549,8 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) } rdma_start_incoming_migration(&channel->u.rdma, errp); #endif - } else if (strstart(uri, "exec:", &p)) { - exec_start_incoming_migration(p, errp); + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_incoming_migration(channel->u.exec.args, errp); } else if (strstart(uri, "file:", &p)) { file_start_incoming_migration(p, errp); } else { @@ -1938,8 +1938,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) { rdma_start_outgoing_migration(s, &channel->u.rdma, &local_err); #endif - } else if (strstart(uri, "exec:", &p)) { - exec_start_outgoing_migration(s, p, &local_err); + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_outgoing_migration(s, channel->u.exec.args, &local_err); } else if (strstart(uri, "file:", &p)) { file_start_outgoing_migration(s, p, &local_err); } else { From 02afba63e93729e3bdc2ffa900c4e8bc835e1950 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 23 Oct 2023 15:20:47 -0300 Subject: [PATCH 35/40] migration: Convert the file backend to the new QAPI syntax Convert the file: URI to accept a FileMigrationArgs to be compatible with the new migration QAPI. Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-9-farosas@suse.de> --- migration/file.c | 22 +++++++--------------- migration/file.h | 9 ++++++--- migration/migration.c | 10 ++++------ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/migration/file.c b/migration/file.c index ec069ef329..5d4975f43e 100644 --- a/migration/file.c +++ b/migration/file.c @@ -36,20 +36,16 @@ int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp) return 0; } -void file_start_outgoing_migration(MigrationState *s, const char *filespec, - Error **errp) +void file_start_outgoing_migration(MigrationState *s, + FileMigrationArgs *file_args, Error **errp) { - g_autofree char *filename = g_strdup(filespec); g_autoptr(QIOChannelFile) fioc = NULL; - uint64_t offset = 0; + g_autofree char *filename = g_strdup(file_args->filename); + uint64_t offset = file_args->offset; QIOChannel *ioc; trace_migration_file_outgoing(filename); - if (file_parse_offset(filename, &offset, errp)) { - return; - } - fioc = qio_channel_file_new_path(filename, O_CREAT | O_WRONLY | O_TRUNC, 0600, errp); if (!fioc) { @@ -73,19 +69,15 @@ static gboolean file_accept_incoming_migration(QIOChannel *ioc, return G_SOURCE_REMOVE; } -void file_start_incoming_migration(const char *filespec, Error **errp) +void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp) { - g_autofree char *filename = g_strdup(filespec); + g_autofree char *filename = g_strdup(file_args->filename); QIOChannelFile *fioc = NULL; - uint64_t offset = 0; + uint64_t offset = file_args->offset; QIOChannel *ioc; trace_migration_file_incoming(filename); - if (file_parse_offset(filename, &offset, errp)) { - return; - } - fioc = qio_channel_file_new_path(filename, O_RDONLY, 0, errp); if (!fioc) { return; diff --git a/migration/file.h b/migration/file.h index 3888a57105..37d6a08bfc 100644 --- a/migration/file.h +++ b/migration/file.h @@ -7,9 +7,12 @@ #ifndef QEMU_MIGRATION_FILE_H #define QEMU_MIGRATION_FILE_H -void file_start_incoming_migration(const char *filename, Error **errp); -void file_start_outgoing_migration(MigrationState *s, const char *filename, - Error **errp); +#include "qapi/qapi-types-migration.h" + +void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp); + +void file_start_outgoing_migration(MigrationState *s, + FileMigrationArgs *file_args, Error **errp); int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp); #endif diff --git a/migration/migration.c b/migration/migration.c index 0911fb310c..04037b340c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -508,7 +508,6 @@ static bool migrate_uri_parse(const char *uri, static void qemu_start_incoming_migration(const char *uri, Error **errp) { - const char *p = NULL; g_autoptr(MigrationAddress) channel = NULL; MigrationIncomingState *mis = migration_incoming_get_current(); @@ -551,8 +550,8 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) #endif } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { exec_start_incoming_migration(channel->u.exec.args, errp); - } else if (strstart(uri, "file:", &p)) { - file_start_incoming_migration(p, errp); + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_incoming_migration(&channel->u.file, errp); } else { error_setg(errp, "unknown migration protocol: %s", uri); } @@ -1900,7 +1899,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, bool resume_requested; Error *local_err = NULL; MigrationState *s = migrate_get_current(); - const char *p = NULL; g_autoptr(MigrationAddress) channel = NULL; /* URI is not suitable for migration? */ @@ -1940,8 +1938,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, #endif } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { exec_start_outgoing_migration(s, channel->u.exec.args, &local_err); - } else if (strstart(uri, "file:", &p)) { - file_start_outgoing_migration(s, p, &local_err); + } else if (channel->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_outgoing_migration(s, &channel->u.file, &local_err); } else { error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); From 074dbce5fcce597e61ff6c44d2977458f0ba2f29 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:48 -0300 Subject: [PATCH 36/40] migration: New migrate and migrate-incoming argument 'channels' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MigrateChannelList allows to connect accross multiple interfaces. Add MigrateChannelList struct as argument to migration QAPIs. We plan to include multiple channels in future, to connnect multiple interfaces. Hence, we choose 'MigrateChannelList' as the new argument over 'MigrateChannel' to make migration QAPIs future proof. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Acked-by: Markus Armbruster Reviewed-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-10-farosas@suse.de> --- migration/migration-hmp-cmds.c | 6 +- migration/migration.c | 56 ++++++++++++++-- qapi/migration.json | 113 ++++++++++++++++++++++++++++++++- system/vl.c | 2 +- 4 files changed, 167 insertions(+), 10 deletions(-) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index a170440991..6ec778a990 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -452,7 +452,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) Error *err = NULL; const char *uri = qdict_get_str(qdict, "uri"); - qmp_migrate_incoming(uri, &err); + qmp_migrate_incoming(uri, false, NULL, &err); hmp_handle_error(mon, err); } @@ -764,8 +764,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) " use blockdev-mirror with NBD instead"); } - qmp_migrate(uri, !!blk, blk, !!inc, inc, - false, false, true, resume, &err); + qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc, + false, false, true, resume, &err); if (hmp_handle_error(mon, err)) { return; } diff --git a/migration/migration.c b/migration/migration.c index 04037b340c..a9cd5e2d5d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -506,11 +506,34 @@ static bool migrate_uri_parse(const char *uri, return true; } -static void qemu_start_incoming_migration(const char *uri, Error **errp) +static void qemu_start_incoming_migration(const char *uri, bool has_channels, + MigrationChannelList *channels, + Error **errp) { g_autoptr(MigrationAddress) channel = NULL; MigrationIncomingState *mis = migration_incoming_get_current(); + /* + * Having preliminary checks for uri and channel + */ + if (has_channels) { + error_setg(errp, "'channels' argument should not be set yet."); + return; + } + + if (uri && has_channels) { + error_setg(errp, "'uri' and 'channels' arguments are mutually " + "exclusive; exactly one of the two should be present in " + "'migrate-incoming' qmp command "); + return; + } + + if (!uri && !has_channels) { + error_setg(errp, "neither 'uri' or 'channels' argument are " + "specified in 'migrate-incoming' qmp command "); + return; + } + /* URI is not suitable for migration? */ if (!migration_channels_and_uri_compatible(uri, errp)) { return; @@ -1681,7 +1704,8 @@ void migrate_del_blocker(Error **reasonp) } } -void qmp_migrate_incoming(const char *uri, Error **errp) +void qmp_migrate_incoming(const char *uri, bool has_channels, + MigrationChannelList *channels, Error **errp) { Error *local_err = NULL; static bool once = true; @@ -1699,7 +1723,7 @@ void qmp_migrate_incoming(const char *uri, Error **errp) return; } - qemu_start_incoming_migration(uri, &local_err); + qemu_start_incoming_migration(uri, has_channels, channels, &local_err); if (local_err) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); @@ -1735,7 +1759,7 @@ void qmp_migrate_recover(const char *uri, Error **errp) * only re-setup the migration stream and poke existing migration * to continue using that newly established channel. */ - qemu_start_incoming_migration(uri, errp); + qemu_start_incoming_migration(uri, false, NULL, errp); } void qmp_migrate_pause(Error **errp) @@ -1892,7 +1916,8 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return true; } -void qmp_migrate(const char *uri, bool has_blk, bool blk, +void qmp_migrate(const char *uri, bool has_channels, + MigrationChannelList *channels, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, bool has_resume, bool resume, Error **errp) { @@ -1901,6 +1926,27 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, MigrationState *s = migrate_get_current(); g_autoptr(MigrationAddress) channel = NULL; + /* + * Having preliminary checks for uri and channel + */ + if (has_channels) { + error_setg(errp, "'channels' argument should not be set yet."); + return; + } + + if (uri && has_channels) { + error_setg(errp, "'uri' and 'channels' arguments are mutually " + "exclusive; exactly one of the two should be present in " + "'migrate' qmp command "); + return; + } + + if (!uri && !has_channels) { + error_setg(errp, "neither 'uri' or 'channels' argument are " + "specified in 'migrate' qmp command "); + return; + } + /* URI is not suitable for migration? */ if (!migration_channels_and_uri_compatible(uri, errp)) { return; diff --git a/qapi/migration.json b/qapi/migration.json index 182b150b87..975761eebd 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1641,6 +1641,34 @@ 'rdma': 'InetSocketAddress', 'file': 'FileMigrationArgs' } } +## +# @MigrationChannelType: +# +# The migration channel-type request options. +# +# @main: Main outbound migration channel. +# +# Since 8.1 +## +{ 'enum': 'MigrationChannelType', + 'data': [ 'main' ] } + +## +# @MigrationChannel: +# +# Migration stream channel parameters. +# +# @channel-type: Channel type for transfering packet information. +# +# @addr: Migration endpoint configuration on destination interface. +# +# Since 8.1 +## +{ 'struct': 'MigrationChannel', + 'data': { + 'channel-type': 'MigrationChannelType', + 'addr': 'MigrationAddress' } } + ## # @migrate: # @@ -1648,6 +1676,9 @@ # # @uri: the Uniform Resource Identifier of the destination VM # +# @channels: list of migration stream channels with each stream in the +# list connected to a destination interface endpoint. +# # @blk: do block migration (full disk copy) # # @inc: incremental disk copy migration @@ -1677,13 +1708,57 @@ # 3. The user Monitor's "detach" argument is invalid in QMP and should # not be used # +# 4. The uri argument should have the Uniform Resource Identifier of +# default destination VM. This connection will be bound to default +# network. +# +# 5. For now, number of migration streams is restricted to one, i.e +# number of items in 'channels' list is just 1. +# +# 6. The 'uri' and 'channels' arguments are mutually exclusive; +# exactly one of the two should be present. +# # Example: # # -> { "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } } # <- { "return": {} } +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "socket", +# "type": "inet", +# "host": "10.12.34.9", +# "port": "1050" } } ] } } +# <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "exec", +# "args": [ "/bin/nc", "-p", "6000", +# "/some/sock" ] } } ] } } +# <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "rdma", +# "host": "10.12.34.9", +# "port": "1050" } } ] } } +# <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "file", +# "filename": "/tmp/migfile", +# "offset": "0x1000" } } ] } } +# <- { "return": {} } +# ## { 'command': 'migrate', 'data': {'uri': 'str', + '*channels': [ 'MigrationChannel' ], '*blk': { 'type': 'bool', 'features': [ 'deprecated' ] }, '*inc': { 'type': 'bool', 'features': [ 'deprecated' ] }, '*detach': 'bool', '*resume': 'bool' } } @@ -1697,6 +1772,9 @@ # @uri: The Uniform Resource Identifier identifying the source or # address to listen on # +# @channels: list of migration stream channels with each stream in the +# list connected to a destination interface endpoint. +# # Returns: nothing on success # # Since: 2.3 @@ -1712,13 +1790,46 @@ # # 3. The uri format is the same as for -incoming # +# 5. For now, number of migration streams is restricted to one, i.e +# number of items in 'channels' list is just 1. +# +# 4. The 'uri' and 'channels' arguments are mutually exclusive; +# exactly one of the two should be present. +# # Example: # # -> { "execute": "migrate-incoming", # "arguments": { "uri": "tcp::4446" } } # <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "socket", +# "type": "inet", +# "host": "10.12.34.9", +# "port": "1050" } } ] } } +# <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "exec", +# "args": [ "/bin/nc", "-p", "6000", +# "/some/sock" ] } } ] } } +# <- { "return": {} } +# +# -> { "execute": "migrate", +# "arguments": { +# "channels": [ { "channel-type": "main", +# "addr": { "transport": "rdma", +# "host": "10.12.34.9", +# "port": "1050" } } ] } } +# <- { "return": {} } ## -{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } } +{ 'command': 'migrate-incoming', + 'data': {'*uri': 'str', + '*channels': [ 'MigrationChannel' ] } } ## # @xen-save-devices-state: diff --git a/system/vl.c b/system/vl.c index 92d29bf521..3fb569254a 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2702,7 +2702,7 @@ void qmp_x_exit_preconfig(Error **errp) if (incoming) { Error *local_err = NULL; if (strcmp(incoming, "defer") != 0) { - qmp_migrate_incoming(incoming, &local_err); + qmp_migrate_incoming(incoming, false, NULL, &local_err); if (local_err) { error_reportf_err(local_err, "-incoming %s: ", incoming); exit(1); From d95533e1cdcc06c383b5ca113bfde02152cb1ab5 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:50 -0300 Subject: [PATCH 37/40] migration: modify migration_channels_and_uri_compatible() for new QAPI syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migration_channels_and_uri_compatible() check for transport mechanism suitable for multifd migration gets executed when the caller calls old uri syntax. It needs it to be run when using the modern MigrateChannel QAPI syntax too. After URI -> 'MigrateChannel' : migration_channels_and_uri_compatible() -> migration_channels_and_transport_compatible() passes object as argument and check for valid transport mechanism. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Reviewed-by: Daniel P. Berrangé Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-12-farosas@suse.de> --- migration/migration.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index a9cd5e2d5d..70725a89ab 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -128,17 +128,20 @@ static bool migration_needs_multiple_sockets(void) return migrate_multifd() || migrate_postcopy_preempt(); } -static bool uri_supports_multi_channels(const char *uri) +static bool transport_supports_multi_channels(SocketAddress *saddr) { - return strstart(uri, "tcp:", NULL) || strstart(uri, "unix:", NULL) || - strstart(uri, "vsock:", NULL); + return saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; } static bool -migration_channels_and_uri_compatible(const char *uri, Error **errp) +migration_channels_and_transport_compatible(MigrationAddress *addr, + Error **errp) { if (migration_needs_multiple_sockets() && - !uri_supports_multi_channels(uri)) { + (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) && + !transport_supports_multi_channels(&addr->u.socket)) { error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); return false; } @@ -534,12 +537,12 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, return; } - /* URI is not suitable for migration? */ - if (!migration_channels_and_uri_compatible(uri, errp)) { + if (uri && !migrate_uri_parse(uri, &channel, errp)) { return; } - if (uri && !migrate_uri_parse(uri, &channel, errp)) { + /* transport mechanism not suitable for migration? */ + if (!migration_channels_and_transport_compatible(channel, errp)) { return; } @@ -1947,12 +1950,12 @@ void qmp_migrate(const char *uri, bool has_channels, return; } - /* URI is not suitable for migration? */ - if (!migration_channels_and_uri_compatible(uri, errp)) { + if (!migrate_uri_parse(uri, &channel, errp)) { return; } - if (!migrate_uri_parse(uri, &channel, errp)) { + /* transport mechanism not suitable for migration? */ + if (!migration_channels_and_transport_compatible(channel, errp)) { return; } From 5994024fd132cf2eb53ca2385fd198b1bd33d446 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:51 -0300 Subject: [PATCH 38/40] migration: Implement MigrateChannelList to qmp migration flow. Integrate MigrateChannelList with all transport backends (socket, exec and rdma) for both src and dest migration endpoints for qmp migration. For current series, limit the size of MigrateChannelList to single element (single interface) as runtime check. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-13-farosas@suse.de> --- migration/migration.c | 101 +++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 70725a89ab..10a6f2fb12 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -458,9 +458,10 @@ void migrate_add_address(SocketAddress *address) } static bool migrate_uri_parse(const char *uri, - MigrationAddress **channel, + MigrationChannel **channel, Error **errp) { + g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1); g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1); SocketAddress *saddr = NULL; InetSocketAddress *isock = &addr->u.rdma; @@ -505,7 +506,9 @@ static bool migrate_uri_parse(const char *uri, return false; } - *channel = g_steal_pointer(&addr); + val->channel_type = MIGRATION_CHANNEL_TYPE_MAIN; + val->addr = g_steal_pointer(&addr); + *channel = g_steal_pointer(&val); return true; } @@ -513,44 +516,47 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, MigrationChannelList *channels, Error **errp) { - g_autoptr(MigrationAddress) channel = NULL; + MigrationChannel *channel = NULL; + MigrationAddress *addr = NULL; MigrationIncomingState *mis = migration_incoming_get_current(); /* * Having preliminary checks for uri and channel */ - if (has_channels) { - error_setg(errp, "'channels' argument should not be set yet."); - return; - } - if (uri && has_channels) { error_setg(errp, "'uri' and 'channels' arguments are mutually " "exclusive; exactly one of the two should be present in " "'migrate-incoming' qmp command "); return; - } - - if (!uri && !has_channels) { + } else if (channels) { + /* To verify that Migrate channel list has only item */ + if (channels->next) { + error_setg(errp, "Channel list has more than one entries"); + return; + } + channel = channels->value; + } else if (uri) { + /* caller uses the old URI syntax */ + if (!migrate_uri_parse(uri, &channel, errp)) { + return; + } + } else { error_setg(errp, "neither 'uri' or 'channels' argument are " "specified in 'migrate-incoming' qmp command "); return; } - - if (uri && !migrate_uri_parse(uri, &channel, errp)) { - return; - } + addr = channel->addr; /* transport mechanism not suitable for migration? */ - if (!migration_channels_and_transport_compatible(channel, errp)) { + if (!migration_channels_and_transport_compatible(addr, errp)) { return; } migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { - SocketAddress *saddr = &channel->u.socket; + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; if (saddr->type == SOCKET_ADDRESS_TYPE_INET || saddr->type == SOCKET_ADDRESS_TYPE_UNIX || saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { @@ -559,7 +565,7 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, fd_start_incoming_migration(saddr->u.fd.str, errp); } #ifdef CONFIG_RDMA - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) { + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { if (migrate_compress()) { error_setg(errp, "RDMA and compression can't be used together"); return; @@ -572,12 +578,12 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, error_setg(errp, "RDMA and multifd can't be used together"); return; } - rdma_start_incoming_migration(&channel->u.rdma, errp); + rdma_start_incoming_migration(&addr->u.rdma, errp); #endif - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { - exec_start_incoming_migration(channel->u.exec.args, errp); - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_FILE) { - file_start_incoming_migration(&channel->u.file, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_incoming_migration(addr->u.exec.args, errp); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_incoming_migration(&addr->u.file, errp); } else { error_setg(errp, "unknown migration protocol: %s", uri); } @@ -1927,35 +1933,38 @@ void qmp_migrate(const char *uri, bool has_channels, bool resume_requested; Error *local_err = NULL; MigrationState *s = migrate_get_current(); - g_autoptr(MigrationAddress) channel = NULL; + MigrationChannel *channel = NULL; + MigrationAddress *addr = NULL; /* * Having preliminary checks for uri and channel */ - if (has_channels) { - error_setg(errp, "'channels' argument should not be set yet."); - return; - } - if (uri && has_channels) { error_setg(errp, "'uri' and 'channels' arguments are mutually " "exclusive; exactly one of the two should be present in " "'migrate' qmp command "); return; - } - - if (!uri && !has_channels) { + } else if (channels) { + /* To verify that Migrate channel list has only item */ + if (channels->next) { + error_setg(errp, "Channel list has more than one entries"); + return; + } + channel = channels->value; + } else if (uri) { + /* caller uses the old URI syntax */ + if (!migrate_uri_parse(uri, &channel, errp)) { + return; + } + } else { error_setg(errp, "neither 'uri' or 'channels' argument are " "specified in 'migrate' qmp command "); return; } - - if (!migrate_uri_parse(uri, &channel, errp)) { - return; - } + addr = channel->addr; /* transport mechanism not suitable for migration? */ - if (!migration_channels_and_transport_compatible(channel, errp)) { + if (!migration_channels_and_transport_compatible(addr, errp)) { return; } @@ -1972,8 +1981,8 @@ void qmp_migrate(const char *uri, bool has_channels, } } - if (channel->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { - SocketAddress *saddr = &channel->u.socket; + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + SocketAddress *saddr = &addr->u.socket; if (saddr->type == SOCKET_ADDRESS_TYPE_INET || saddr->type == SOCKET_ADDRESS_TYPE_UNIX || saddr->type == SOCKET_ADDRESS_TYPE_VSOCK) { @@ -1982,13 +1991,13 @@ void qmp_migrate(const char *uri, bool has_channels, fd_start_outgoing_migration(s, saddr->u.fd.str, &local_err); } #ifdef CONFIG_RDMA - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_RDMA) { - rdma_start_outgoing_migration(s, &channel->u.rdma, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) { + rdma_start_outgoing_migration(s, &addr->u.rdma, &local_err); #endif - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_EXEC) { - exec_start_outgoing_migration(s, channel->u.exec.args, &local_err); - } else if (channel->transport == MIGRATION_ADDRESS_TYPE_FILE) { - file_start_outgoing_migration(s, &channel->u.file, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) { + exec_start_outgoing_migration(s, addr->u.exec.args, &local_err); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + file_start_outgoing_migration(s, &addr->u.file, &local_err); } else { error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); From 967f2de5c9ec224480b940d5c9fc207ca625b7d8 Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:52 -0300 Subject: [PATCH 39/40] migration: Implement MigrateChannelList to hmp migration flow. Integrate MigrateChannelList with all transport backends (socket, exec and rdma) for both src and dest migration endpoints for hmp migration. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Signed-off-by: Fabiano Rosas Message-ID: <20231023182053.8711-14-farosas@suse.de> Signed-off-by: Juan Quintela --- migration/migration-hmp-cmds.c | 23 +++++++++++++++++++++-- migration/migration.c | 5 ++--- migration/migration.h | 3 ++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 6ec778a990..86ae832176 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -451,9 +451,18 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) { Error *err = NULL; const char *uri = qdict_get_str(qdict, "uri"); + MigrationChannelList *caps = NULL; + g_autoptr(MigrationChannel) channel = NULL; - qmp_migrate_incoming(uri, false, NULL, &err); + if (!migrate_uri_parse(uri, &channel, &err)) { + goto end; + } + QAPI_LIST_PREPEND(caps, g_steal_pointer(&channel)); + qmp_migrate_incoming(NULL, true, caps, &err); + qapi_free_MigrationChannelList(caps); + +end: hmp_handle_error(mon, err); } @@ -753,6 +762,8 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) bool resume = qdict_get_try_bool(qdict, "resume", false); const char *uri = qdict_get_str(qdict, "uri"); Error *err = NULL; + MigrationChannelList *caps = NULL; + g_autoptr(MigrationChannel) channel = NULL; if (inc) { warn_report("option '-i' is deprecated;" @@ -764,12 +775,20 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) " use blockdev-mirror with NBD instead"); } - qmp_migrate(uri, false, NULL, !!blk, blk, !!inc, inc, + if (!migrate_uri_parse(uri, &channel, &err)) { + hmp_handle_error(mon, err); + return; + } + QAPI_LIST_PREPEND(caps, g_steal_pointer(&channel)); + + qmp_migrate(NULL, true, caps, !!blk, blk, !!inc, inc, false, false, true, resume, &err); if (hmp_handle_error(mon, err)) { return; } + qapi_free_MigrationChannelList(caps); + if (!detach) { HMPMigrationStatus *status; diff --git a/migration/migration.c b/migration/migration.c index 10a6f2fb12..28a34c9068 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -457,9 +457,8 @@ void migrate_add_address(SocketAddress *address) QAPI_CLONE(SocketAddress, address)); } -static bool migrate_uri_parse(const char *uri, - MigrationChannel **channel, - Error **errp) +bool migrate_uri_parse(const char *uri, MigrationChannel **channel, + Error **errp) { g_autoptr(MigrationChannel) val = g_new0(MigrationChannel, 1); g_autoptr(MigrationAddress) addr = g_new0(MigrationAddress, 1); diff --git a/migration/migration.h b/migration/migration.h index af8c965b7f..cf2c9c88e0 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -520,7 +520,8 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, Error **errp); void migrate_add_address(SocketAddress *address); - +bool migrate_uri_parse(const char *uri, MigrationChannel **channel, + Error **errp); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); #define qemu_ram_foreach_block \ From 8e3766eefbb4036cbc280c1f1a0d28537929f7fb Mon Sep 17 00:00:00 2001 From: Het Gala Date: Mon, 23 Oct 2023 15:20:53 -0300 Subject: [PATCH 40/40] migration: modify test_multifd_tcp_none() to use new QAPI syntax. modify multifd tcp common test to incorporate the new QAPI syntax defined. Suggested-by: Aravind Retnakaran Signed-off-by: Het Gala Signed-off-by: Fabiano Rosas Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela Message-ID: <20231023182053.8711-15-farosas@suse.de> --- tests/qtest/migration-test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 047b7194df..e803b46039 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -1310,7 +1310,12 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, migrate_prepare_for_dirty_mem(from); qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming'," - " 'arguments': { 'uri': 'tcp:127.0.0.1:0' }}"); + " 'arguments': { " + " 'channels': [ { 'channel-type': 'main'," + " 'addr': { 'transport': 'socket'," + " 'type': 'inet'," + " 'host': '127.0.0.1'," + " 'port': '0' } } ] } }"); /* Wait for the first serial output from the source */ wait_for_serial("src_serial");