From cd523a4181b152004fc0aed18381aefe600ac38e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 2 Nov 2021 16:51:57 +0100 Subject: [PATCH 01/76] net/vhost-vdpa: fix memory leak in vhost_vdpa_get_max_queue_pairs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use g_autofree to ensure that `config` is freed when vhost_vdpa_get_max_queue_pairs() returns. Reported-by: Coverity (CID 1465228: RESOURCE_LEAK) Fixes: 402378407d ("vhost-vdpa: multiqueue support") Signed-off-by: Stefano Garzarella Message-Id: <20211102155157.241034-1-sgarzare@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Acked-by: Jason Wang --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 49ab322511..373b706b90 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -214,7 +214,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) { unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); - struct vhost_vdpa_config *config; + g_autofree struct vhost_vdpa_config *config = NULL; __virtio16 *max_queue_pairs; uint64_t features; int ret; From b66cecb238d065628b85a18357d28d21618a4580 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Nov 2021 16:33:42 +0000 Subject: [PATCH 02/76] softmmu/qdev-monitor: fix use-after-free in qdev_set_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Coverity (CID 1465222). Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id") Cc: Damien Hedde Cc: Kevin Wolf Cc: Michael S. Tsirkin Signed-off-by: Stefan Hajnoczi Message-Id: <20211102163342.31162-1-stefanha@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Wolf Reviewed-by: Marc-André Lureau Reviewed-by: Damien Hedde Reviewed-by: Markus Armbruster --- softmmu/qdev-monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index f8b3a4cd82..588a62b88d 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) if (prop) { dev->id = id; } else { - g_free(id); error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); return NULL; } } else { From 245cf2c24eb0d5a4fe75151e1794aa5cd53b130d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 09:56:24 +0100 Subject: [PATCH 03/76] vhost: Rename last_index to vq_index_end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The doc of this field pointed out that last_index is the last vq index. This is misleading, since it's actually one past the end of the vqs. Renaming and modifying comment. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20211104085625.2054959-2-eperezma@redhat.com> Reviewed-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 4 ++-- hw/virtio/vhost-vdpa.c | 2 +- include/hw/virtio/vhost.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 0d888f29a6..29f2c4212f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -232,10 +232,10 @@ fail: } static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, - int last_index) + int vq_index_end) { net->dev.vq_index = vq_index; - net->dev.last_index = last_index; + net->dev.vq_index_end = vq_index_end; } static int vhost_net_start_one(struct vhost_net *net, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0d8051426c..bcaf00e09f 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -645,7 +645,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } - if (dev->vq_index + dev->nvqs != dev->last_index) { + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { return 0; } diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 3fa0b554ef..58a73e7b7a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -74,8 +74,8 @@ struct vhost_dev { unsigned int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; - /* the last vq index for the virtio device (not vhost) */ - int last_index; + /* one past the last vq index for the virtio device (not vhost) */ + int vq_index_end; /* if non-zero, minimum required value for max_queues */ int num_queues; uint64_t features; From 14c81b2191fffb61cd6d2a7a839d34ec2d5e09a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 Nov 2021 09:56:25 +0100 Subject: [PATCH 04/76] vhost: Fix last vq queue index of devices with no cvq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -1 assumes that cvq device model is accounted in data_queue_pairs, if cvq does not exists, but it's actually the opposite: Devices with !cvq are ok but devices with cvq does not add the last queue to data_queue_pairs. This is not a problem to vhost-net, but it is to vhost-vdpa: * Devices with cvq gets initialized at last data vq device model, not at cvq one. * Devices with !cvq never gets initialized, since last_index is the first queue of the last device model. Because of that, the right change in last_index is to actually add the cvq, not to remove the missing one. This is not a problem to vhost-net, but it is to vhost-vdpa, which device model trust to reach the last index to finish starting the device. Also, as the previous commit, rename it to index_end. Tested with vp_vdpa with host's vhost=on and vhost=off, with ctrl_vq=on and ctrl_vq=off. Fixes: 049eb15b5fc9 ("vhost: record the last virtqueue index for the virtio device") Reviewed-by: Juan Quintela Signed-off-by: Eugenio Pérez Message-Id: <20211104085625.2054959-3-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 29f2c4212f..30379d2ca4 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -326,11 +326,11 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, VirtIONet *n = VIRTIO_NET(dev); int nvhosts = data_queue_pairs + cvq; struct vhost_net *net; - int r, e, i, last_index = data_queue_pairs * 2; + int r, e, i, index_end = data_queue_pairs * 2; NetClientState *peer; - if (!cvq) { - last_index -= 1; + if (cvq) { + index_end += 1; } if (!k->set_guest_notifiers) { @@ -347,7 +347,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } net = get_vhost_net(peer); - vhost_net_set_vq_index(net, i * 2, last_index); + vhost_net_set_vq_index(net, i * 2, index_end); /* Suppress the masking guest notifiers on vhost user * because vhost user doesn't interrupt masking/unmasking From be81ba604260f4beae1522241c579eeda1b891fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 6 Nov 2021 15:50:16 +0100 Subject: [PATCH 05/76] hw/mem/pc-dimm: Restrict NUMA-specific code to NUMA machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When trying to use the pc-dimm device on a non-NUMA machine, we get: $ qemu-system-arm -M none -cpu max -S \ -object memory-backend-file,id=mem1,size=1M,mem-path=/tmp/1m \ -device pc-dimm,id=dimm1,memdev=mem1 Segmentation fault (core dumped) (gdb) bt #0 pc_dimm_realize (dev=0x555556da3e90, errp=0x7fffffffcd10) at hw/mem/pc-dimm.c:184 #1 0x0000555555fe1f8f in device_set_realized (obj=0x555556da3e90, value=true, errp=0x7fffffffce18) at hw/core/qdev.c:531 #2 0x0000555555feb4a9 in property_set_bool (obj=0x555556da3e90, v=0x555556e54420, name=0x5555563c3c41 "realized", opaque=0x555556a704f0, errp=0x7fffffffce18) at qom/object.c:2257 To avoid that crash, restrict the pc-dimm NUMA check to machines supporting NUMA, and do not allow the use of 'node' property on non-NUMA machines. Suggested-by: Igor Mammedov Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20211106145016.611332-1-f4bug@amsat.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index a3a2560301..48b913aba6 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -181,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MachineState *ms = MACHINE(qdev_get_machine()); - int nb_numa_nodes = ms->numa_state->num_nodes; + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } if (!dimm->hostmem) { error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); @@ -191,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) object_get_canonical_path_component(OBJECT(dimm->hostmem))); return; } - if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || - (!nb_numa_nodes && dimm->node)) { - error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" - PRIu32 "' which exceeds the number of numa nodes: %d", - dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); - return; - } if (ddc->realize) { ddc->realize(dimm, errp); From 2aa1842d6d79dcd1b84c58eeb44591a99a9e56df Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 12 Nov 2021 06:08:53 -0500 Subject: [PATCH 06/76] pcie: rename 'native-hotplug' to 'x-native-hotplug' Mark property as experimental/internal adding 'x-' prefix. Property was introduced in 6.1 and it should have provided ability to turn on native PCIE hotplug on port even when ACPI PCI hotplug is in use is user explicitly sets property on CLI. However that never worked since slot is wired to ACPI hotplug controller. Another non-intended usecase: disable native hotplug on slot when APCI based hotplug is disabled, which works but slot has 'hotplug' property for this taks. It should be relatively safe to rename it to experimental as no users should exist for it and given that the property is broken we don't really want to leave it around for much longer lest users start using it. Signed-off-by: Igor Mammedov Reviewed-by: Ani Sinha Message-Id: <20211112110857.3116853-2-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 2 +- hw/pci/pcie_port.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 797e09500b..fc34b905ee 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -243,7 +243,7 @@ static void pc_q35_init(MachineState *machine) NULL); if (acpi_pcihp) { - object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug", + object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", "false", true); } diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index da850e8dde..e95c1e5519 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -148,7 +148,7 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), - DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true), + DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true), DEFINE_PROP_END_OF_LIST() }; From c318bef76206c2ecb6016e8e68c4ac6ff9a4c8cb Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:54 -0500 Subject: [PATCH 07/76] hw/acpi/ich9: Add compat prop to keep HPC bit set for 6.1 machine type To solve issues [1-2] the Hot Plug Capable bit in PCIe Slots will be turned on, while the switch to ACPI Hot-plug will be done in the DSDT table. Introducing 'x-keep-native-hpc' property disables the HPC bit only in 6.1 and as a result keeps the forced 'reserve-io' on pcie-root-ports in 6.1 too. [1] https://gitlab.com/qemu-project/qemu/-/issues/641 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409 Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-3-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ich9.c | 18 ++++++++++++++++++ hw/i386/pc.c | 2 ++ hw/i386/pc_q35.c | 7 ++++++- include/hw/acpi/ich9.h | 1 + 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 1ee2ba2c50..ebe08ed831 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -419,6 +419,20 @@ static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) s->pm.use_acpi_hotplug_bridge = value; } +static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.keep_pci_slot_hpc; +} + +static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.keep_pci_slot_hpc = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -428,6 +442,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s4 = 0; pm->s4_val = 2; pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); @@ -454,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, ich9_pm_get_acpi_pci_hotplug, ich9_pm_set_acpi_pci_hotplug); + object_property_add_bool(obj, "x-keep-pci-slot-hpc", + ich9_pm_get_keep_pci_slot_hpc, + ich9_pm_set_keep_pci_slot_hpc); } void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2592a82148..a2ef40ecbc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_1[] = { { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" }, }; const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1); @@ -107,6 +108,7 @@ GlobalProperty pc_compat_6_0[] = { { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" }, }; const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index fc34b905ee..e1e100316d 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -137,6 +137,7 @@ static void pc_q35_init(MachineState *machine) DriveInfo *hd[MAX_SATA_PORTS]; MachineClass *mc = MACHINE_GET_CLASS(machine); bool acpi_pcihp; + bool keep_pci_slot_hpc; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -242,7 +243,11 @@ static void pc_q35_init(MachineState *machine) ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); - if (acpi_pcihp) { + keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc), + "x-keep-pci-slot-hpc", + NULL); + + if (!keep_pci_slot_hpc && acpi_pcihp) { object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", "false", true); } diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index f04f1791bd..7ca92843c6 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -56,6 +56,7 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; CPUHotplugState cpuhp_state; + bool keep_pci_slot_hpc; bool use_acpi_hotplug_bridge; AcpiPciHpState acpi_pci_hotplug; MemHotplugState acpi_memory_hotplug; From be12e3a016f10f4daa1a248df97198f4b10ef8c4 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:55 -0500 Subject: [PATCH 08/76] bios-tables-test: Allow changes in DSDT ACPI tables Prepare for changing the _OSC method in q35 DSDT. Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Acked-by: Ani Sinha Message-Id: <20211112110857.3116853-4-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..48e5634d4b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,17 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT", +"tests/data/acpi/q35/DSDT.tis", +"tests/data/acpi/q35/DSDT.bridge", +"tests/data/acpi/q35/DSDT.mmio64", +"tests/data/acpi/q35/DSDT.ipmibt", +"tests/data/acpi/q35/DSDT.cphp", +"tests/data/acpi/q35/DSDT.memhp", +"tests/data/acpi/q35/DSDT.acpihmat", +"tests/data/acpi/q35/DSDT.numamem", +"tests/data/acpi/q35/DSDT.dimmpxm", +"tests/data/acpi/q35/DSDT.nohpet", +"tests/data/acpi/q35/DSDT.tis.tpm2", +"tests/data/acpi/q35/DSDT.tis.tpm12", +"tests/data/acpi/q35/DSDT.multi-bridge", +"tests/data/acpi/q35/DSDT.ivrs", +"tests/data/acpi/q35/DSDT.xapic", From 211afe5c69b597acf85fdd577eb497f5be1ffbd8 Mon Sep 17 00:00:00 2001 From: Julia Suvorova Date: Fri, 12 Nov 2021 06:08:56 -0500 Subject: [PATCH 09/76] hw/i386/acpi-build: Deny control on PCIe Native Hot-plug in _OSC There are two ways to enable ACPI PCI Hot-plug: * Disable the Hot-plug Capable bit on PCIe slots. This was the first approach which led to regression [1-2], as I/O space for a port is allocated only when it is hot-pluggable, which is determined by HPC bit. * Leave the HPC bit on and disable PCIe Native Hot-plug in _OSC method. This removes the (future) ability of hot-plugging switches with PCIe Native hotplug since ACPI PCI Hot-plug only works with cold-plugged bridges. If the user wants to explicitely use this feature, they can disable ACPI PCI Hot-plug with: --global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off Change the bit in _OSC method so that the OS selects ACPI PCI Hot-plug instead of PCIe Native. [1] https://gitlab.com/qemu-project/qemu/-/issues/641 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409 Signed-off-by: Julia Suvorova Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-5-imammedo@redhat.com> Reviewed-by: Ani Sinha Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a3ad6abd33..a99c6e4fe3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1337,7 +1337,7 @@ static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) aml_append(table, scope); } -static Aml *build_q35_osc_method(void) +static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) { Aml *if_ctx; Aml *if_ctx2; @@ -1359,8 +1359,10 @@ static Aml *build_q35_osc_method(void) /* * Always allow native PME, AER (no dependencies) * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. */ - aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); /* Unknown revision */ @@ -1449,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method()); + aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1565,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, build_q35_osc_method()); + + /* Expander bridges do not have ACPI PCI Hot-plug enabled */ + aml_append(dev, build_q35_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } From 7e6055c99f2f1f5e6a206e8d07a9f45508832611 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 12 Nov 2021 06:08:57 -0500 Subject: [PATCH 10/76] tests: bios-tables-test update expected blobs The changes are the result of 'hw/i386/acpi-build: Deny control on PCIe Native Hot-Plug in _OSC' which hides PCIE hotplug bit in host-bridge _OSC Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities { CreateDWordField (Arg3, Zero, CDW1) If ((Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766") /* PCI Host Bridge Device */)) { CreateDWordField (Arg3, 0x04, CDW2) CreateDWordField (Arg3, 0x08, CDW3) Local0 = CDW3 /* \_SB_.PCI0._OSC.CDW3 */ - Local0 &= 0x1F + Local0 &= 0x1E Signed-off-by: Igor Mammedov Message-Id: <20211112110857.3116853-6-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT | Bin 8289 -> 8289 bytes tests/data/acpi/q35/DSDT.acpihmat | Bin 9614 -> 9614 bytes tests/data/acpi/q35/DSDT.bridge | Bin 11003 -> 11003 bytes tests/data/acpi/q35/DSDT.cphp | Bin 8753 -> 8753 bytes tests/data/acpi/q35/DSDT.dimmpxm | Bin 9943 -> 9943 bytes tests/data/acpi/q35/DSDT.ipmibt | Bin 8364 -> 8364 bytes tests/data/acpi/q35/DSDT.ivrs | Bin 8306 -> 8306 bytes tests/data/acpi/q35/DSDT.memhp | Bin 9648 -> 9648 bytes tests/data/acpi/q35/DSDT.mmio64 | Bin 9419 -> 9419 bytes tests/data/acpi/q35/DSDT.multi-bridge | Bin 8583 -> 8583 bytes tests/data/acpi/q35/DSDT.nohpet | Bin 8147 -> 8147 bytes tests/data/acpi/q35/DSDT.numamem | Bin 8295 -> 8295 bytes tests/data/acpi/q35/DSDT.tis.tpm12 | Bin 8894 -> 8894 bytes tests/data/acpi/q35/DSDT.tis.tpm2 | Bin 8894 -> 8894 bytes tests/data/acpi/q35/DSDT.xapic | Bin 35652 -> 35652 bytes tests/qtest/bios-tables-test-allowed-diff.h | 16 ---------------- 16 files changed, 16 deletions(-) diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT index 281fc82c03b2562d2e6b7caec0d817b034a47138..c1965f6051ef2af81dd8412abe169d87845bb033 100644 GIT binary patch delta 24 gcmaFp@X&$FCDET<0BnZ{w*UYD delta 24 gcmaFp@X&$FCDET<0BnK?w*UYD diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat index 8c1e05a11a328ec1cc6f86e36e52c28f41f9744e..f24d4874bff8d327a165ed7c36de507aea114edd 100644 GIT binary patch delta 24 fcmeD4?(^ny33dtTQ)OUa+&+=(3ZvY{`|DKzU@Hhn delta 24 fcmeD4?(^ny33dtTQ)OUa+%}Qx3ZwkS`|DKzU?vDi diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge index 6f1464b6c712d7f33cb4b891b7ce76fe228f44c9..424d51bd1cb39ea73501ef7d0044ee52cec5bdac 100644 GIT binary patch delta 24 gcmewz`a6`%CDF$oF>WH)6-K#@_k$DxTWtqt delta 24 fcmdn!veAXhCDF$oF?J%?6-N1u_k$DxTWAMo diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm index fe5820d93d057ef09a001662369b15afbc5b87e2..76e451e829ec4c245315f7eed8731aa1be45a747 100644 GIT binary patch delta 24 gcmccad)=4ICDD~$3R?@yKo0BrFHn*aa+ diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp index 9bc11518fc57687ca789dc70793b48b29a0d74ed..4e9cb3dc6896bb79ccac0fe342a404549f6610e8 100644 GIT binary patch delta 24 gcmdnsy}_HyCD7Sg;8$f{S^uTTRaEr delta 24 fcmZp7Zg=K#33dr-S7cyd?48JUg;9Rv{S^uTTQ>*m diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet index e8202e6ddfbe96071f32f1ec05758f650569943e..83d1aa00ac5686df479673fb0d7830f946e25dea 100644 GIT binary patch delta 24 gcmca?f7zbPCDn+a delta 24 gcmaFv@Z5pRCDn+a diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12 index c96b5277a14ae98174408d690d6e0246bd932623..0ebdf6fbd77967f1ab5d5337b7b1fed314cfaca8 100644 GIT binary patch delta 24 gcmdnzy3du%CDyjp@iVCN7s?mk^h31_s6r6S=N1%5A)#+64f6_X&Rh delta 26 icmX>yjp@iVCN7s?mk^h31_s9U6S=N1%5S`%+64f6@(F(c diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 48e5634d4b..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,17 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/q35/DSDT", -"tests/data/acpi/q35/DSDT.tis", -"tests/data/acpi/q35/DSDT.bridge", -"tests/data/acpi/q35/DSDT.mmio64", -"tests/data/acpi/q35/DSDT.ipmibt", -"tests/data/acpi/q35/DSDT.cphp", -"tests/data/acpi/q35/DSDT.memhp", -"tests/data/acpi/q35/DSDT.acpihmat", -"tests/data/acpi/q35/DSDT.numamem", -"tests/data/acpi/q35/DSDT.dimmpxm", -"tests/data/acpi/q35/DSDT.nohpet", -"tests/data/acpi/q35/DSDT.tis.tpm2", -"tests/data/acpi/q35/DSDT.tis.tpm12", -"tests/data/acpi/q35/DSDT.multi-bridge", -"tests/data/acpi/q35/DSDT.ivrs", -"tests/data/acpi/q35/DSDT.xapic", From f463e761a41ee71e59892121e1c74d9c25c985d2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Nov 2021 14:38:53 +0800 Subject: [PATCH 11/76] virtio: use virtio accessor to access packed descriptor flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to access packed descriptor flags via address_space_{write|read}_cached(). When we hit the cache, memcpy() is used which is not an atomic operation which may lead a wrong value is read or wrote. So this patch switches to use virito_{stw|lduw}_phys_cached() to make sure the aceess is atomic. Fixes: 86044b24e865f ("virtio: basic packed virtqueue support") Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang Message-Id: <20211111063854.29060-1-jasowang@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cc69a9b881..939bcbfeb9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -507,11 +507,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, MemoryRegionCache *cache, int i) { - address_space_read_cached(cache, - i * sizeof(VRingPackedDesc) + - offsetof(VRingPackedDesc, flags), - flags, sizeof(*flags)); - virtio_tswap16s(vdev, flags); + hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); + + *flags = virtio_lduw_phys_cached(vdev, cache, off); } static void vring_packed_desc_read(VirtIODevice *vdev, @@ -564,8 +562,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev, { hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); - virtio_tswap16s(vdev, &desc->flags); - address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); + virtio_stw_phys_cached(vdev, cache, off, desc->flags); address_space_cache_invalidate(cache, off, sizeof(desc->flags)); } From d152cdd6f6fad381e804c8185f0ba938030ccac9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 11 Nov 2021 14:38:54 +0800 Subject: [PATCH 12/76] virtio: use virtio accessor to access packed event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to access packed descriptor event and off_wrap via address_space_{write|read}_cached(). When we hit the cache, memcpy() is used which is not atomic which may lead a wrong value to be read or wrote. This patch fixes this by switching to use virito_{stw|lduw}_phys_cached() to make sure the access is atomic. Fixes: 683f7665679c1 ("virtio: event suppression support for packed ring") Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang Message-Id: <20211111063854.29060-2-jasowang@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 939bcbfeb9..ea7c079fb0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -247,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev, hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); - address_space_read_cached(cache, off_flags, &e->flags, - sizeof(e->flags)); + e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags); /* Make sure flags is seen before off_wrap */ smp_rmb(); - address_space_read_cached(cache, off_off, &e->off_wrap, - sizeof(e->off_wrap)); - virtio_tswap16s(vdev, &e->off_wrap); + e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); virtio_tswap16s(vdev, &e->flags); } @@ -263,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); - virtio_tswap16s(vdev, &off_wrap); - address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); + virtio_stw_phys_cached(vdev, cache, off, off_wrap); address_space_cache_invalidate(cache, off, sizeof(off_wrap)); } @@ -273,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, flags); - virtio_tswap16s(vdev, &flags); - address_space_write_cached(cache, off, &flags, sizeof(flags)); + virtio_stw_phys_cached(vdev, cache, off, flags); address_space_cache_invalidate(cache, off, sizeof(flags)); } From 0351152b6f16e23e65407b4c4b27e23b46aa3801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 12 Nov 2021 20:34:30 +0100 Subject: [PATCH 13/76] vdpa: Replace qemu_open_old by qemu_open at MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to keep using the old one, since we neither use the variadics arguments nor open it with O_DIRECT. Also, net_client_init1, the caller of net_init_vhost_vdpa, wants all net_client_init_fun to use Error API, so it's a good step in that direction. Signed-off-by: Eugenio Pérez Message-Id: <20211112193431.2379298-2-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 373b706b90..1a7250b980 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -261,7 +261,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; - vdpa_device_fd = qemu_open_old(opts->vhostdev, O_RDWR); + vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); if (vdpa_device_fd == -1) { return -errno; } From c829540401f280f0cad24131f4e7b92e81d506fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 12 Nov 2021 20:34:31 +0100 Subject: [PATCH 14/76] vdpa: Check for existence of opts.vhostdev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since net_init_vhost_vdpa is trying to open it. Not specifying it in the command line crash qemu. Fixes: 7327813d17 ("vhost-vdpa: open device fd in net_init_vhost_vdpa()") Signed-off-by: Eugenio Pérez Message-Id: <20211112193431.2379298-3-eperezma@redhat.com> Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 1a7250b980..2e3c22a8c7 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -260,6 +260,10 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; + if (!opts->vhostdev) { + error_setg(errp, "vdpa character device not specified with vhostdev"); + return -1; + } vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); if (vdpa_device_fd == -1) { From 23786d13441d36e0efcfee94ba8ff218746bed6c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:54 +0100 Subject: [PATCH 15/76] pci: implement power state This allows to power off pci devices. In "off" state the devices will not be visible. No pci config space access, no pci bar access, no dma. Default state is "on", so this patch (alone) should not change behavior. Use case: Allows hotplug controllers implement slot power. Hotplug controllers doing so should set the inital power state for devices in the ->plug callback. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-2-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 25 +++++++++++++++++++++++-- hw/pci/pci_host.c | 6 ++++-- include/hw/pci/pci.h | 2 ++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4a84e478ce..e5993c1ef5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); + if (!d->has_power) { + new_addr = PCI_BAR_UNMAPPED; + } /* This bar isn't changed */ if (new_addr == r->addr) @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (range_covers_byte(addr, l, PCI_COMMAND)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + + pci_set_power(pci_dev, true); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, @@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ + if (d->has_power == state) { + return; + } + + d->has_power = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { + pci_device_reset(d); + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index cf02f0d6a5..7beafd40a8 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return; } @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return ~0x0; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 5c4016b995..e7cdf2d5ec 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; + bool has_power; /* PCI config space */ uint8_t *config; @@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device; } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_power(PCIDevice *pci_dev, bool state); #endif From d5daff7d312653b92f23c7a8e198090b32b8dae6 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:55 +0100 Subject: [PATCH 16/76] pcie: implement slot power control for pcie root ports With this patch hot-plugged pci devices will only be visible to the guest if the guests hotplug driver has enabled slot power. This should fix the hot-plug race which one can hit when hot-plugging a pci device at boot, while the guest is in the middle of the pci bus scan. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-3-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 914a9bf3d1..13d11a57c7 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev) } } +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + bool *power = opaque; + + pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ + uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + bool power = true; + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + } + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_set_power_device, &power); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } + pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + pcie_cap_update_power(hotplug_pdev); } } @@ -625,6 +650,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); + pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -705,6 +731,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } + pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -731,6 +758,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); + pcie_cap_update_power(dev); return 0; } From 81124b3c7a5dae34881cd8cd9631f125da81ef97 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:56 +0100 Subject: [PATCH 17/76] pcie: add power indicator blink check Refuse to push the attention button in case the guest is busy with some hotplug operation (as indicated by the power indicator blinking). Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-4-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 13d11a57c7..b92dbff118 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -506,6 +506,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -521,6 +522,12 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + dev->pending_deleted_event = true; /* In case user cancel the operation of multi-function hot-add, From 44242d4d3d082a28200fdbecaed5398122682e6e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:57 +0100 Subject: [PATCH 18/76] pcie: factor out pcie_cap_slot_unplug() No functional change. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-5-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index b92dbff118..959bf074b2 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -497,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || + (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -676,7 +695,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); - uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -726,17 +744,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || - (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); + pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); From 0d33415a4eafe532f3beef8272811d927236a353 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:58 +0100 Subject: [PATCH 19/76] pcie: fast unplug when slot power is off In case the slot is powered off (and the power indicator turned off too) we can unplug right away, without round-trip to the guest. Also clear pending attention button press, there is nothing to care about any more. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-6-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 959bf074b2..a930ac738a 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -560,6 +560,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && + ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + /* slot is powered off -> unplug without round-trip to the guest */ + pcie_cap_slot_do_unplug(hotplug_pdev); + hotplug_event_notify(hotplug_pdev); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); + return; + } + pcie_cap_slot_push_attention_button(hotplug_pdev); } From 18416c62e36a79823a9e28f6b2260aa13c25e1d9 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 11 Nov 2021 14:08:59 +0100 Subject: [PATCH 20/76] pcie: expire pending delete Add an expire time for pending delete, once the time is over allow pressing the attention button again. This makes pcie hotplug behave more like acpi hotplug, where one can try sending an 'device_del' monitor command again in case the guest didn't respond to the first attempt. Signed-off-by: Gerd Hoffmann Message-Id: <20211111130859.1171890-7-kraxel@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 2 ++ include/hw/qdev-core.h | 1 + softmmu/qdev-monitor.c | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index a930ac738a..c5ed266337 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -548,6 +548,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, } dev->pending_deleted_event = true; + dev->pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 72622bd337..20d3066595 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -181,6 +181,7 @@ struct DeviceState { char *canonical_path; bool realized; bool pending_deleted_event; + int64_t pending_deleted_expires_ms; QDict *opts; int hotplugged; bool allow_unplug_during_migration; diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index 588a62b88d..5925f1ae5f 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -943,7 +943,9 @@ void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); if (dev != NULL) { - if (dev->pending_deleted_event) { + if (dev->pending_deleted_event && + (dev->pending_deleted_expires_ms == 0 || + dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { error_setg(errp, "Device %s is already in the " "process of unplug", id); return; From 01b5ab8cc08ab0afb6574f46a4d966725a00a1de Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 30 Sep 2021 16:08:40 +0100 Subject: [PATCH 21/76] hw/intc/arm_gicv3: Move checking of redist-region-count to arm_gicv3_common_realize The GICv3 devices have an array property redist-region-count. Currently we check this for errors (bad values) in gicv3_init_irqs_and_mmio(), just before we use it. Move this error checking to the arm_gicv3_common_realize() function, where we sanity-check all of the other base-class properties. (This will always be before gicv3_init_irqs_and_mmio() is called, because that function is called in the subclass realize methods, after they have called the parent-class realize.) The motivation for this refactor is: * we would like to use the redist_region_count[] values in arm_gicv3_common_realize() in a subsequent patch, so we need to have already done the sanity-checking first * this removes the only use of the Error** argument to gicv3_init_irqs_and_mmio(), so we can remove some error-handling boilerplate Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- hw/intc/arm_gicv3.c | 6 +----- hw/intc/arm_gicv3_common.c | 26 +++++++++++++------------- hw/intc/arm_gicv3_kvm.c | 6 +----- include/hw/intc/arm_gicv3_common.h | 2 +- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index 3f24707838..bcf54a5f0a 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -393,11 +393,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) return; } - gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops); gicv3_init_cpuif(s); } diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 223db16fec..8e47809398 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -250,22 +250,11 @@ static const VMStateDescription vmstate_gicv3 = { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp) + const MemoryRegionOps *ops) { SysBusDevice *sbd = SYS_BUS_DEVICE(s); - int rdist_capacity = 0; int i; - for (i = 0; i < s->nb_redist_regions; i++) { - rdist_capacity += s->redist_region_count[i]; - } - if (rdist_capacity < s->num_cpu) { - error_setg(errp, "Capacity of the redist regions(%d) " - "is less than number of vcpus(%d)", - rdist_capacity, s->num_cpu); - return; - } - /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU. * GPIO array layout is thus: * [0..N-1] spi @@ -308,7 +297,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) { GICv3State *s = ARM_GICV3_COMMON(dev); - int i; + int i, rdist_capacity; /* revision property is actually reserved and currently used only in order * to keep the interface compatible with GICv2 code, avoiding extra @@ -350,6 +339,17 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) return; } + rdist_capacity = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + rdist_capacity += s->redist_region_count[i]; + } + if (rdist_capacity < s->num_cpu) { + error_setg(errp, "Capacity of the redist regions(%d) " + "is less than number of vcpus(%d)", + rdist_capacity, s->num_cpu); + return; + } + s->cpu = g_new0(GICv3CPUState, s->num_cpu); for (i = 0; i < s->num_cpu; i++) { diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 5c09f00dec..ab58c73306 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -787,11 +787,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) return; } - gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); for (i = 0; i < s->num_cpu; i++) { ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index aa4f0d6770..cb2b0d0ad4 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -306,6 +306,6 @@ struct ARMGICv3CommonClass { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp); + const MemoryRegionOps *ops); #endif From 046164155abe7594ad1d8729dbe150e95badeaed Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 30 Sep 2021 16:08:41 +0100 Subject: [PATCH 22/76] hw/intc/arm_gicv3: Set GICR_TYPER.Last correctly when nb_redist_regions > 1 The 'Last' bit in the GICR_TYPER GICv3 redistributor register is supposed to be set to 1 if this is the last redistributor in a series of contiguous redistributor pages. Currently we set Last only for the redistributor for CPU (num_cpu - 1). This only works if there is a single redistributor region; if there are multiple redistributor regions then we need to set the Last bit for the last redistributor in each region. This doesn't cause any problems currently because only the KVM GICv3 supports multiple redistributor regions, and it ignores the value in GICv3State::gicr_typer. But we need to fix this before we can enable support for multiple regions in the emulated GICv3. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- hw/intc/arm_gicv3_common.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 8e47809398..8de9205b38 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -297,7 +297,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) { GICv3State *s = ARM_GICV3_COMMON(dev); - int i, rdist_capacity; + int i, rdist_capacity, cpuidx; /* revision property is actually reserved and currently used only in order * to keep the interface compatible with GICv2 code, avoiding extra @@ -355,7 +355,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) for (i = 0; i < s->num_cpu; i++) { CPUState *cpu = qemu_get_cpu(i); uint64_t cpu_affid; - int last; s->cpu[i].cpu = cpu; s->cpu[i].gic = s; @@ -375,7 +374,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) * PLPIS == 0 (physical LPIs not supported) */ cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); - last = (i == s->num_cpu - 1); /* The CPU mp-affinity property is in MPIDR register format; squash * the affinity bytes into 32 bits as the GICR_TYPER has them. @@ -384,13 +382,22 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) (cpu_affid & 0xFFFFFF); s->cpu[i].gicr_typer = (cpu_affid << 32) | (1 << 24) | - (i << 8) | - (last << 4); + (i << 8); if (s->lpi_enable) { s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS; } } + + /* + * Now go through and set GICR_TYPER.Last for the final + * redistributor in each region. + */ + cpuidx = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + cpuidx += s->redist_region_count[i]; + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } } static void arm_gicv3_finalize(Object *obj) From e5cba10ee16566a479cebec3890e91df91e33ab5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 30 Sep 2021 16:08:42 +0100 Subject: [PATCH 23/76] hw/intc/arm_gicv3: Support multiple redistributor regions Our GICv3 QOM interface includes an array property redist-region-count which allows board models to specify that the registributor registers are not in a single contiguous range, but split into multiple pieces. We implemented this for KVM, but currently the TCG GICv3 model insists that there is only one region. You can see the limit being hit with a setup like: qemu-system-aarch64 -machine virt,gic-version=3 -smp 124 Add support for split regions to the TCG GICv3. To do this we switch from allocating a simple array of MemoryRegions to an array of GICv3RedistRegion structs so that we can use the GICv3RedistRegion as the opaque pointer in the MemoryRegion read/write callbacks. Each GICv3RedistRegion contains the MemoryRegion, a backpointer allowing the read/write callback to get hold of the GICv3State, and an index which allows us to calculate which CPU's redistributor is being accessed. Note that arm_gicv3_kvm always passes in NULL as the ops argument to gicv3_init_irqs_and_mmio(), so the only MemoryRegion read/write callbacks we need to update to handle this new scheme are the gicv3_redist_read/write functions used by the emulated GICv3. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- hw/intc/arm_gicv3.c | 6 ----- hw/intc/arm_gicv3_common.c | 15 ++++++++--- hw/intc/arm_gicv3_kvm.c | 4 +-- hw/intc/arm_gicv3_redist.c | 40 ++++++++++++++++-------------- include/hw/intc/arm_gicv3_common.h | 12 ++++++++- 5 files changed, 46 insertions(+), 31 deletions(-) diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index bcf54a5f0a..c6282984b1 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -387,12 +387,6 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) return; } - if (s->nb_redist_regions != 1) { - error_setg(errp, "VGICv3 redist region number(%d) not equal to 1", - s->nb_redist_regions); - return; - } - gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops); gicv3_init_cpuif(s); diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 8de9205b38..9884d2e39b 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -254,6 +254,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, { SysBusDevice *sbd = SYS_BUS_DEVICE(s); int i; + int cpuidx; /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU. * GPIO array layout is thus: @@ -282,14 +283,20 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, "gicv3_dist", 0x10000); sysbus_init_mmio(sbd, &s->iomem_dist); - s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions); + s->redist_regions = g_new0(GICv3RedistRegion, s->nb_redist_regions); + cpuidx = 0; for (i = 0; i < s->nb_redist_regions; i++) { char *name = g_strdup_printf("gicv3_redist_region[%d]", i); + GICv3RedistRegion *region = &s->redist_regions[i]; - memory_region_init_io(&s->iomem_redist[i], OBJECT(s), - ops ? &ops[1] : NULL, s, name, + region->gic = s; + region->cpuidx = cpuidx; + cpuidx += s->redist_region_count[i]; + + memory_region_init_io(®ion->iomem, OBJECT(s), + ops ? &ops[1] : NULL, region, name, s->redist_region_count[i] * GICV3_REDIST_SIZE); - sysbus_init_mmio(sbd, &s->iomem_redist[i]); + sysbus_init_mmio(sbd, ®ion->iomem); g_free(name); } } diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index ab58c73306..5ec5ff9ef6 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -825,7 +825,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); if (!multiple_redist_region_allowed) { - kvm_arm_register_device(&s->iomem_redist[0], -1, + kvm_arm_register_device(&s->redist_regions[0].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); } else { @@ -838,7 +838,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) uint64_t addr_ormask = i | ((uint64_t)s->redist_region_count[i] << 52); - kvm_arm_register_device(&s->iomem_redist[i], -1, + kvm_arm_register_device(&s->redist_regions[i].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, s->dev_fd, addr_ormask); diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c index 7072bfcbb1..424e7e28a8 100644 --- a/hw/intc/arm_gicv3_redist.c +++ b/hw/intc/arm_gicv3_redist.c @@ -425,22 +425,24 @@ static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset, MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; @@ -482,22 +484,24 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index cb2b0d0ad4..fc38e4b7dc 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -215,13 +215,23 @@ struct GICv3CPUState { bool seenbetter; }; +/* + * The redistributor pages might be split into more than one region + * on some machine types if there are many CPUs. + */ +typedef struct GICv3RedistRegion { + GICv3State *gic; + MemoryRegion iomem; + uint32_t cpuidx; /* index of first CPU this region covers */ +} GICv3RedistRegion; + struct GICv3State { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ MemoryRegion iomem_dist; /* Distributor */ - MemoryRegion *iomem_redist; /* Redistributor Regions */ + GICv3RedistRegion *redist_regions; /* Redistributor Regions */ uint32_t *redist_region_count; /* redistributor count within each region */ uint32_t nb_redist_regions; /* number of redist regions */ From 1adf528ec3bdf62ea3b580b7ad562534a3676ff5 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 20 Sep 2021 14:25:35 +0200 Subject: [PATCH 24/76] hw/rtc/pl031: Send RTC_CHANGE QMP event The PL031 currently is not able to report guest RTC change to the QMP monitor as opposed to mc146818 or spapr RTCs. This patch adds the call to qapi_event_send_rtc_change() when the Load Register is written. The value which is reported corresponds to the difference between the guest reference time and the reference time kept in softmmu/rtc.c. For instance adding 20s to the guest RTC value will report 20. Adding an extra 20s to the guest RTC value will report 20 + 20 = 40. The inclusion of qapi/qapi-types-misc-target.h in hw/rtl/pl031.c require to compile the PL031 with specific_ss.add() to avoid ./qapi/qapi-types-misc-target.h:18:13: error: attempt to use poisoned "TARGET_". Signed-off-by: Eric Auger Reviewed-by: Peter Maydell Message-id: 20210920122535.269988-1-eric.auger@redhat.com Signed-off-by: Peter Maydell --- hw/rtc/meson.build | 2 +- hw/rtc/pl031.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build index 7cecdee5dd..8fd8d8f9a7 100644 --- a/hw/rtc/meson.build +++ b/hw/rtc/meson.build @@ -2,7 +2,7 @@ softmmu_ss.add(when: 'CONFIG_DS1338', if_true: files('ds1338.c')) softmmu_ss.add(when: 'CONFIG_M41T80', if_true: files('m41t80.c')) softmmu_ss.add(when: 'CONFIG_M48T59', if_true: files('m48t59.c')) -softmmu_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) +specific_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) softmmu_ss.add(when: 'CONFIG_TWL92230', if_true: files('twl92230.c')) softmmu_ss.add(when: ['CONFIG_ISA_BUS', 'CONFIG_M48T59'], if_true: files('m48t59-isa.c')) softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP', if_true: files('xlnx-zynqmp-rtc.c')) diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c index 2bbb2062ac..e7ced90b02 100644 --- a/hw/rtc/pl031.c +++ b/hw/rtc/pl031.c @@ -24,6 +24,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "trace.h" +#include "qapi/qapi-events-misc-target.h" #define RTC_DR 0x00 /* Data read register */ #define RTC_MR 0x04 /* Match register */ @@ -136,10 +137,17 @@ static void pl031_write(void * opaque, hwaddr offset, trace_pl031_write(offset, value); switch (offset) { - case RTC_LR: + case RTC_LR: { + struct tm tm; + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); + qapi_event_send_rtc_change(qemu_timedate_diff(&tm)); + pl031_set_alarm(s); break; + } case RTC_MR: s->mr = value; pl031_set_alarm(s); From 2523a7956519cc13c92b9214d188cd2489d4df2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 11 Nov 2021 10:17:16 +0100 Subject: [PATCH 25/76] tests/unit/test-smp-parse: Restore MachineClass fields after modifying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a single MachineClass object, registered with type_register_static(&smp_machine_info). Since the same object is used multiple times (an MachineState object is instantiated in both test_generic and test_with_dies), we should restore its internal state after modifying for the test purpose. Reviewed-by: Andrew Jones Reviewed-by: Richard Henderson Reviewed-by: Yanan Wang Tested-by: Yanan Wang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20211115145900.2531865-2-philmd@redhat.com> --- tests/unit/test-smp-parse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c index cbe0c99049..47774c1ee2 100644 --- a/tests/unit/test-smp-parse.c +++ b/tests/unit/test-smp-parse.c @@ -512,7 +512,7 @@ static void test_generic(void) smp_parse_test(ms, data, true); } - /* Reset the supported min CPUs and max CPUs */ + /* Force invalid min CPUs and max CPUs */ mc->min_cpus = 2; mc->max_cpus = 511; @@ -523,6 +523,10 @@ static void test_generic(void) smp_parse_test(ms, data, false); } + /* Reset the supported min CPUs and max CPUs */ + mc->min_cpus = MIN_CPUS; + mc->max_cpus = MAX_CPUS; + object_unref(obj); } @@ -536,6 +540,7 @@ static void test_with_dies(void) int i; smp_machine_class_init(mc); + /* Force the SMP compat properties */ mc->smp_props.dies_supported = true; for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) { @@ -575,6 +580,9 @@ static void test_with_dies(void) smp_parse_test(ms, data, false); } + /* Restore the SMP compat properties */ + mc->smp_props.dies_supported = false; + object_unref(obj); } From c3440eff4c0b6ec7adf391620393cb9755eab8d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 11 Nov 2021 09:20:22 +0100 Subject: [PATCH 26/76] tests/unit/test-smp-parse: QOM'ify smp_machine_class_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smp_machine_class_init() is the actual TypeInfo::class_init(). Declare it as such in smp_machine_info, and avoid to call it manually in each test. Move smp_machine_info definition just before we register the type to avoid a forward declaration. Reviewed-by: Andrew Jones Reviewed-by: Richard Henderson Reviewed-by: Yanan Wang Tested-by: Yanan Wang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20211115145900.2531865-3-philmd@redhat.com> --- tests/unit/test-smp-parse.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c index 47774c1ee2..3fff2af4e2 100644 --- a/tests/unit/test-smp-parse.c +++ b/tests/unit/test-smp-parse.c @@ -75,14 +75,6 @@ typedef struct SMPTestData { const char *expect_error; } SMPTestData; -/* Type info of the tested machine */ -static const TypeInfo smp_machine_info = { - .name = TYPE_MACHINE, - .parent = TYPE_OBJECT, - .class_size = sizeof(MachineClass), - .instance_size = sizeof(MachineState), -}; - /* * List all the possible valid sub-collections of the generic 5 * topology parameters (i.e. cpus/maxcpus/sockets/cores/threads), @@ -480,9 +472,10 @@ static void unsupported_params_init(MachineClass *mc, SMPTestData *data) } } -/* Reset the related machine properties before each sub-test */ -static void smp_machine_class_init(MachineClass *mc) +static void machine_base_class_init(ObjectClass *oc, void *data) { + MachineClass *mc = MACHINE_CLASS(oc); + mc->min_cpus = MIN_CPUS; mc->max_cpus = MAX_CPUS; @@ -498,8 +491,6 @@ static void test_generic(void) SMPTestData *data = &(SMPTestData){{ }}; int i; - smp_machine_class_init(mc); - for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) { *data = data_generic_valid[i]; unsupported_params_init(mc, data); @@ -539,7 +530,6 @@ static void test_with_dies(void) unsigned int num_dies = 2; int i; - smp_machine_class_init(mc); /* Force the SMP compat properties */ mc->smp_props.dies_supported = true; @@ -586,12 +576,24 @@ static void test_with_dies(void) object_unref(obj); } +/* Type info of the tested machine */ +static const TypeInfo smp_machine_types[] = { + { + .name = TYPE_MACHINE, + .parent = TYPE_OBJECT, + .class_init = machine_base_class_init, + .class_size = sizeof(MachineClass), + .instance_size = sizeof(MachineState), + } +}; + +DEFINE_TYPES(smp_machine_types) + int main(int argc, char *argv[]) { - g_test_init(&argc, &argv, NULL); - module_call_init(MODULE_INIT_QOM); - type_register_static(&smp_machine_info); + + g_test_init(&argc, &argv, NULL); g_test_add_func("/test-smp-parse/generic", test_generic); g_test_add_func("/test-smp-parse/with_dies", test_with_dies); From 7b6d1bc9629f3dd45647ec3418e0606a92dddd48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 11 Nov 2021 10:27:45 +0100 Subject: [PATCH 27/76] tests/unit/test-smp-parse: Explicit MachineClass name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the MachineClass::name pointer is not explicitly set, it is NULL. Per the C standard, passing a NULL pointer to printf "%s" format is undefined. Some implementations display it as 'NULL', other as 'null'. Since we are comparing the formatted output, we need a stable value. The easiest is to explicit a machine name string. Reviewed-by: Andrew Jones Reviewed-by: Richard Henderson Reviewed-by: Yanan Wang Tested-by: Yanan Wang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20211115145900.2531865-4-philmd@redhat.com> --- tests/unit/test-smp-parse.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c index 3fff2af4e2..b02450e25a 100644 --- a/tests/unit/test-smp-parse.c +++ b/tests/unit/test-smp-parse.c @@ -23,6 +23,8 @@ #define MIN_CPUS 1 /* set the min CPUs supported by the machine as 1 */ #define MAX_CPUS 512 /* set the max CPUs supported by the machine as 512 */ +#define SMP_MACHINE_NAME "TEST-SMP" + /* * Used to define the generic 3-level CPU topology hierarchy * -sockets/cores/threads @@ -307,13 +309,13 @@ static struct SMPTestData data_generic_invalid[] = { * should tweak the supported min CPUs to 2 for testing */ .config = SMP_CONFIG_GENERIC(T, 1, F, 0, F, 0, F, 0, F, 0), .expect_error = "Invalid SMP CPUs 1. The min CPUs supported " - "by machine '(null)' is 2", + "by machine '" SMP_MACHINE_NAME "' is 2", }, { /* config: -smp 512 * should tweak the supported max CPUs to 511 for testing */ .config = SMP_CONFIG_GENERIC(T, 512, F, 0, F, 0, F, 0, F, 0), .expect_error = "Invalid SMP CPUs 512. The max CPUs supported " - "by machine '(null)' is 511", + "by machine '" SMP_MACHINE_NAME "' is 511", }, }; @@ -481,6 +483,8 @@ static void machine_base_class_init(ObjectClass *oc, void *data) mc->smp_props.prefer_sockets = true; mc->smp_props.dies_supported = false; + + mc->name = g_strdup(SMP_MACHINE_NAME); } static void test_generic(void) From 8d3dd037d947ce96c42c376ef0bb5e5c6ef96cbe Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:57 +0100 Subject: [PATCH 28/76] stream: Traverse graph after modification bdrv_cor_filter_drop() modifies the block graph. That means that other parties can also modify the block graph before it returns. Therefore, we cannot assume that the result of a graph traversal we did before remains valid afterwards. We should thus fetch `base` and `unfiltered_base` afterwards instead of before. Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-2-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-2-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block/stream.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/stream.c b/block/stream.c index 97bee482dc..e45113aed6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,8 +54,8 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); + BlockDriverState *base; + BlockDriverState *unfiltered_base; Error *local_err = NULL; int ret = 0; @@ -63,6 +63,9 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; + base = bdrv_filter_or_cow_bs(s->above_base); + unfiltered_base = bdrv_skip_filters(base); + if (bdrv_cow_child(unfiltered_bs)) { const char *base_id = NULL, *base_fmt = NULL; if (unfiltered_base) { From a225369bce684c01095be52e1014e3fa91dec210 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:58 +0100 Subject: [PATCH 29/76] block: Manipulate children list in .attach/.detach The children list is specific to BDS parents. We should not modify it in the general children modification code, but let BDS parents deal with it in their .attach() and .detach() methods. This also has the advantage that a BdrvChild is removed from the children list before its .bs pointer can become NULL. BDS parents generally assume that their children's .bs pointer is never NULL, so this is actually a bug fix. Signed-off-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-3-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-3-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 580cb77a70..ca024ffced 100644 --- a/block.c +++ b/block.c @@ -1387,6 +1387,8 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + QLIST_INSERT_HEAD(&bs->children, child, next); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } @@ -1403,6 +1405,8 @@ static void bdrv_child_cb_detach(BdrvChild *child) } bdrv_unapply_subtree_drain(child, bs); + + QLIST_REMOVE(child, next); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -2747,7 +2751,7 @@ static void bdrv_child_free(void *opaque) static void bdrv_remove_empty_child(BdrvChild *child) { assert(!child->bs); - QLIST_SAFE_REMOVE(child, next); + assert(!child->next.le_prev); /* not in children list */ bdrv_child_free(child); } @@ -2913,12 +2917,6 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return ret; } - QLIST_INSERT_HEAD(&parent_bs->children, *child, next); - /* - * child is removed in bdrv_attach_child_common_abort(), so don't care to - * abort this change separately. - */ - return 0; } @@ -4851,7 +4849,6 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) BdrvRemoveFilterOrCowChild *s = opaque; BlockDriverState *parent_bs = s->child->opaque; - QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); if (s->is_backing) { parent_bs->backing = s->child; } else { @@ -4906,7 +4903,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - QLIST_SAFE_REMOVE(child, next); if (s->is_backing) { bs->backing = NULL; } else { From 04c9c3a52c21088e7039956e5a635250fe3eaee6 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:53:59 +0100 Subject: [PATCH 30/76] block: Unite remove_empty_child and child_free Now that bdrv_remove_empty_child() no longer removes the child from the parent's children list but only checks that it is not in such a list, it is only a wrapper around bdrv_child_free() that checks that the child is empty and unused. That should apply to all children that we free, so put those checks into bdrv_child_free() and drop bdrv_remove_empty_child(). Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-4-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-4-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/block.c b/block.c index ca024ffced..19bff4f95c 100644 --- a/block.c +++ b/block.c @@ -2740,19 +2740,19 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } } -static void bdrv_child_free(void *opaque) -{ - BdrvChild *c = opaque; - - g_free(c->name); - g_free(c); -} - -static void bdrv_remove_empty_child(BdrvChild *child) +/** + * Free the given @child. + * + * The child must be empty (i.e. `child->bs == NULL`) and it must be + * unused (i.e. not in a children list). + */ +static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); assert(!child->next.le_prev); /* not in children list */ - bdrv_child_free(child); + + g_free(child->name); + g_free(child); } typedef struct BdrvAttachChildCommonState { @@ -2786,7 +2786,7 @@ static void bdrv_attach_child_common_abort(void *opaque) } bdrv_unref(bs); - bdrv_remove_empty_child(child); + bdrv_child_free(child); *s->child = NULL; } @@ -2859,7 +2859,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, if (ret < 0) { error_propagate(errp, local_err); - bdrv_remove_empty_child(new_child); + bdrv_child_free(new_child); return ret; } } @@ -2925,7 +2925,7 @@ static void bdrv_detach_child(BdrvChild *child) BlockDriverState *old_bs = child->bs; bdrv_replace_child_noperm(child, NULL); - bdrv_remove_empty_child(child); + bdrv_child_free(child); if (old_bs) { /* From 265180614189b969058f5e507c8964fc7dfd422f Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:00 +0100 Subject: [PATCH 31/76] block: Drop detached child from ignore list bdrv_attach_child_common_abort() restores the parent's AioContext. To do so, the child (which was supposed to be attached, but is now detached again by this abort handler) is added to the ignore list for the AioContext changing functions. However, since we modify a BDS's children list in the BdrvChildClass's .attach and .detach handlers, the child is already effectively detached from the parent by this point. We do not need to put it into the ignore list. Use this opportunity to clean up the empty line structure: Keep setting the ignore list, invoking the AioContext function, and freeing the ignore list in blocks separated by empty lines. Signed-off-by: Hanna Reitz Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20211111120829.81329-5-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-5-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/block.c b/block.c index 19bff4f95c..c7d5aa5254 100644 --- a/block.c +++ b/block.c @@ -2774,14 +2774,16 @@ static void bdrv_attach_child_common_abort(void *opaque) } if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { - GSList *ignore = g_slist_prepend(NULL, child); + GSList *ignore; + /* No need to ignore `child`, because it has been detached already */ + ignore = NULL; child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, &error_abort); g_slist_free(ignore); - ignore = g_slist_prepend(NULL, child); - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); + ignore = NULL; + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); g_slist_free(ignore); } From be64bbb0149748f3999c49b13976aafb8330ea86 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:01 +0100 Subject: [PATCH 32/76] block: Pass BdrvChild ** to replace_child_noperm bdrv_replace_child_noperm() modifies BdrvChild.bs, and can potentially set it to NULL. That is dangerous, because BDS parents generally assume that their children's .bs pointer is never NULL. We therefore want to let bdrv_replace_child_noperm() set the corresponding BdrvChild pointer to NULL, too. This patch lays the foundation for it by passing a BdrvChild ** pointer to bdrv_replace_child_noperm() so that it can later use it to NULL the BdrvChild pointer immediately after setting BdrvChild.bs to NULL. (We will still need to undertake some intermediate steps, though.) Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-6-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-6-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index c7d5aa5254..d668156eca 100644 --- a/block.c +++ b/block.c @@ -87,7 +87,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild *child, +static void bdrv_replace_child_noperm(BdrvChild **child, BlockDriverState *new_bs); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, @@ -2270,7 +2270,7 @@ static void bdrv_replace_child_abort(void *opaque) BlockDriverState *new_bs = s->child->bs; /* old_bs reference is transparently moved from @s to @s->child */ - bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2300,7 +2300,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(child, new_bs); + bdrv_replace_child_noperm(&child, new_bs); /* old_bs reference is transparently moved from @child to @s */ } @@ -2672,9 +2672,10 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild *child, +static void bdrv_replace_child_noperm(BdrvChild **childp, BlockDriverState *new_bs) { + BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2767,7 +2768,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(child, NULL); + bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); @@ -2867,7 +2868,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); - bdrv_replace_child_noperm(new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs); *child = new_child; @@ -2922,12 +2923,12 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return 0; } -static void bdrv_detach_child(BdrvChild *child) +static void bdrv_detach_child(BdrvChild **childp) { - BlockDriverState *old_bs = child->bs; + BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(child, NULL); - bdrv_child_free(child); + bdrv_replace_child_noperm(childp, NULL); + bdrv_child_free(*childp); if (old_bs) { /* @@ -3033,7 +3034,7 @@ void bdrv_root_unref_child(BdrvChild *child) BlockDriverState *child_bs; child_bs = child->bs; - bdrv_detach_child(child); + bdrv_detach_child(&child); bdrv_unref(child_bs); } From 562bda8bb41879eeda0bd484dd3d55134579b28e Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:02 +0100 Subject: [PATCH 33/76] block: Restructure remove_file_or_backing_child() As of a future patch, bdrv_replace_child_tran() will take a BdrvChild ** pointer. Prepare for that by getting such a pointer and using it where applicable, and (dereferenced) as a parameter for bdrv_replace_child_tran(). Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-7-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-7-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index d668156eca..8da057f800 100644 --- a/block.c +++ b/block.c @@ -4887,30 +4887,33 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran) { + BdrvChild **childp; BdrvRemoveFilterOrCowChild *s; - assert(child == bs->backing || child == bs->file); - if (!child) { return; } + if (child == bs->backing) { + childp = &bs->backing; + } else if (child == bs->file) { + childp = &bs->file; + } else { + g_assert_not_reached(); + } + if (child->bs) { - bdrv_replace_child_tran(child, NULL, tran); + bdrv_replace_child_tran(*childp, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, - .is_backing = (child == bs->backing), + .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - if (s->is_backing) { - bs->backing = NULL; - } else { - bs->file = NULL; - } + *childp = NULL; } /* From 079bff693bc47bf69fb131f87a03c1689e48ed55 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:03 +0100 Subject: [PATCH 34/76] transactions: Invoke clean() after everything else Invoke the transaction drivers' .clean() methods only after all .commit() or .abort() handlers are done. This makes it easier to have nested transactions where the top-level transactions pass objects to lower transactions that the latter can still use throughout their commit/abort phases, while the top-level transaction keeps a reference that is released in its .clean() method. (Before this commit, that is also possible, but the top-level transaction would need to take care to invoke tran_add() before the lower-level transaction does. This commit makes the ordering irrelevant, which is just a bit nicer.) Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-8-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-8-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- include/qemu/transactions.h | 3 +++ util/transactions.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h index 92c5965235..2f2060acd9 100644 --- a/include/qemu/transactions.h +++ b/include/qemu/transactions.h @@ -31,6 +31,9 @@ * tran_create(), call your "prepare" functions on it, and finally call * tran_abort() or tran_commit() to finalize the transaction by corresponding * finalization actions in reverse order. + * + * The clean() functions registered by the drivers in a transaction are called + * last, after all abort() or commit() functions have been called. */ #ifndef QEMU_TRANSACTIONS_H diff --git a/util/transactions.c b/util/transactions.c index d0bc9a3e73..2dbdedce95 100644 --- a/util/transactions.c +++ b/util/transactions.c @@ -61,11 +61,13 @@ void tran_abort(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->abort) { act->drv->abort(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } @@ -80,11 +82,13 @@ void tran_commit(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->commit) { act->drv->commit(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } From 82b54cf51656bf3cd5ed1ac549e8a1085a0e3290 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:04 +0100 Subject: [PATCH 35/76] block: Let replace_child_tran keep indirect pointer As of a future commit, bdrv_replace_child_noperm() will clear the indirect BdrvChild pointer passed to it if the new child BDS is NULL. bdrv_replace_child_tran() will want to let it do that, but revert this change in its abort handler. For that, we need to have it receive a BdrvChild ** pointer, too, and keep it stored in the BdrvReplaceChildState object that we attach to the transaction. Note that we do not need to store it in the BdrvReplaceChildState when new_bs is not NULL, because then there is nothing to revert. This is important so that bdrv_replace_node_noperm() can pass a pointer to a loop-local variable to bdrv_replace_child_tran() without worrying that this pointer will outlive one loop iteration. (Of course, for that to work, bdrv_replace_node_noperm() and in turn bdrv_replace_node() and its relatives may not be called with a NULL @to node. Luckily, they already are not, but now we should assert this.) bdrv_remove_file_or_backing_child() on the other hand needs to ensure that the indirect pointer it passes will stay valid for the duration of the transaction. Ensure this by keeping a strong reference to the BDS whose &bs->backing or &bs->file it passes to bdrv_replace_child_tran(), and giving up that reference only in the transaction .clean() handler. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-9-hreitz@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-9-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 8da057f800..a40027161c 100644 --- a/block.c +++ b/block.c @@ -2254,6 +2254,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; + BdrvChild **childp; BlockDriverState *old_bs; } BdrvReplaceChildState; @@ -2269,7 +2270,29 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; - /* old_bs reference is transparently moved from @s to @s->child */ + /* + * old_bs reference is transparently moved from @s to s->child. + * + * Pass &s->child here instead of s->childp, because: + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it + * will not modify s->child. From that perspective, it does not matter + * whether we pass s->childp or &s->child. + * (TODO: Right now, bdrv_replace_child_noperm() never modifies that + * pointer anyway (though it will in the future), so at this point it + * absolutely does not matter whether we pass s->childp or &s->child.) + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use + * it here. + * (3) If new_bs is NULL, *s->childp will have been NULLed by + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we + * must not pass a NULL *s->childp here. + * (TODO: In its current state, bdrv_replace_child_noperm() will not + * have NULLed *s->childp, so this does not apply yet. It will in the + * future.) + * + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in + * any case, there is no reason to pass it anyway. + */ bdrv_replace_child_noperm(&s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2286,22 +2309,32 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. + * + * Note that if new_bs == NULL, @childp is stored in a state object attached + * to @tran, so that the old child can be reinstated in the abort handler. + * Therefore, if @new_bs can be NULL, @childp must stay valid until the + * transaction is committed or aborted. + * + * (TODO: The reinstating does not happen yet, but it will once + * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) */ -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, +static void bdrv_replace_child_tran(BdrvChild **childp, + BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, + .child = *childp, + .childp = new_bs == NULL ? childp : NULL, + .old_bs = (*childp)->bs, }; tran_add(tran, &bdrv_replace_child_drv, s); if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(&child, new_bs); - /* old_bs reference is transparently moved from @child to @s */ + bdrv_replace_child_noperm(childp, new_bs); + /* old_bs reference is transparently moved from *childp to @s */ } /* @@ -4844,6 +4877,7 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) typedef struct BdrvRemoveFilterOrCowChild { BdrvChild *child; + BlockDriverState *bs; bool is_backing; } BdrvRemoveFilterOrCowChild; @@ -4873,10 +4907,19 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) bdrv_child_free(s->child); } +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + /* Drop the bs reference after the transaction is done */ + bdrv_unref(s->bs); + g_free(s); +} + static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { .abort = bdrv_remove_filter_or_cow_child_abort, .commit = bdrv_remove_filter_or_cow_child_commit, - .clean = g_free, + .clean = bdrv_remove_filter_or_cow_child_clean, }; /* @@ -4894,6 +4937,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, return; } + /* + * Keep a reference to @bs so @childp will stay valid throughout the + * transaction (required by bdrv_replace_child_tran()) + */ + bdrv_ref(bs); if (child == bs->backing) { childp = &bs->backing; } else if (child == bs->file) { @@ -4903,12 +4951,13 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, } if (child->bs) { - bdrv_replace_child_tran(*childp, NULL, tran); + bdrv_replace_child_tran(childp, NULL, tran); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, + .bs = bs, .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); @@ -4934,6 +4983,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, { BdrvChild *c, *next; + assert(to != NULL); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { @@ -4949,7 +5000,12 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - bdrv_replace_child_tran(c, to, tran); + + /* + * Passing a pointer to the local variable @c is fine here, because + * @to is not NULL, and so &c will not be attached to the transaction. + */ + bdrv_replace_child_tran(&c, to, tran); } return 0; @@ -4964,6 +5020,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * * With @detach_subchain=true @to must be in a backing chain of @from. In this * case backing link of the cow-parent of @to is removed. + * + * @to must not be NULL. */ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, @@ -4976,6 +5034,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + assert(to != NULL); + if (detach_subchain) { assert(bdrv_chain_contains(from, to)); assert(from != to); @@ -5031,6 +5091,9 @@ out: return ret; } +/** + * Replace node @from by @to (where neither may be NULL). + */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { @@ -5098,7 +5161,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); From b0a9f6fed3d80de610dcd04a7e66f9f30a04174f Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:05 +0100 Subject: [PATCH 36/76] block: Let replace_child_noperm free children In most of the block layer, especially when traversing down from other BlockDriverStates, we assume that BdrvChild.bs can never be NULL. When it becomes NULL, it is expected that the corresponding BdrvChild pointer also becomes NULL and the BdrvChild object is freed. Therefore, once bdrv_replace_child_noperm() sets the BdrvChild.bs pointer to NULL, it should also immediately set the corresponding BdrvChild pointer (like bs->file or bs->backing) to NULL. In that context, it also makes sense for this function to free the child. Sometimes we cannot do so, though, because it is called in a transactional context where the caller might still want to reinstate the child in the abort branch (and free it only on commit), so this behavior has to remain optional. In bdrv_replace_child_tran()'s abort handler, we now rely on the fact that the BdrvChild passed to bdrv_replace_child_tran() must have had a non-NULL .bs pointer initially. Make a note of that and assert it. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-10-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-10-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- block.c | 102 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 23 deletions(-) diff --git a/block.c b/block.c index a40027161c..0ac5b163d2 100644 --- a/block.c +++ b/block.c @@ -87,8 +87,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); +static void bdrv_child_free(BdrvChild *child); static void bdrv_replace_child_noperm(BdrvChild **child, - BlockDriverState *new_bs); + BlockDriverState *new_bs, + bool free_empty_child); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran); @@ -2256,12 +2258,16 @@ typedef struct BdrvReplaceChildState { BdrvChild *child; BdrvChild **childp; BlockDriverState *old_bs; + bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + if (s->free_empty_child && !s->child->bs) { + bdrv_child_free(s->child); + } bdrv_unref(s->old_bs); } @@ -2278,22 +2284,26 @@ static void bdrv_replace_child_abort(void *opaque) * modify the BdrvChild * pointer we indirectly pass to it, i.e. it * will not modify s->child. From that perspective, it does not matter * whether we pass s->childp or &s->child. - * (TODO: Right now, bdrv_replace_child_noperm() never modifies that - * pointer anyway (though it will in the future), so at this point it - * absolutely does not matter whether we pass s->childp or &s->child.) * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use * it here. * (3) If new_bs is NULL, *s->childp will have been NULLed by * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we * must not pass a NULL *s->childp here. - * (TODO: In its current state, bdrv_replace_child_noperm() will not - * have NULLed *s->childp, so this does not apply yet. It will in the - * future.) * * So whether new_bs was NULL or not, we cannot pass s->childp here; and in * any case, there is no reason to pass it anyway. */ - bdrv_replace_child_noperm(&s->child, s->old_bs); + bdrv_replace_child_noperm(&s->child, s->old_bs, true); + /* + * The child was pre-existing, so s->old_bs must be non-NULL, and + * s->child thus must not have been freed + */ + assert(s->child != NULL); + if (!new_bs) { + /* As described above, *s->childp was cleared, so restore it */ + assert(s->childp != NULL); + *s->childp = s->child; + } bdrv_unref(new_bs); } @@ -2310,30 +2320,44 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * The function doesn't update permissions, caller is responsible for this. * + * (*childp)->bs must not be NULL. + * * Note that if new_bs == NULL, @childp is stored in a state object attached * to @tran, so that the old child can be reinstated in the abort handler. * Therefore, if @new_bs can be NULL, @childp must stay valid until the * transaction is committed or aborted. * - * (TODO: The reinstating does not happen yet, but it will once - * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.) + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed (on commit). @free_empty_child should only be false if the + * caller will free the BDrvChild themselves (which may be important + * if this is in turn called in another transactional context). */ static void bdrv_replace_child_tran(BdrvChild **childp, BlockDriverState *new_bs, - Transaction *tran) + Transaction *tran, + bool free_empty_child) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { .child = *childp, .childp = new_bs == NULL ? childp : NULL, .old_bs = (*childp)->bs, + .free_empty_child = free_empty_child, }; tran_add(tran, &bdrv_replace_child_drv, s); + /* The abort handler relies on this */ + assert(s->old_bs != NULL); + if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(childp, new_bs); + /* + * Pass free_empty_child=false, we will free the child (if + * necessary) in bdrv_replace_child_commit() (if our + * @free_empty_child parameter was true). + */ + bdrv_replace_child_noperm(childp, new_bs, false); /* old_bs reference is transparently moved from *childp to @s */ } @@ -2705,8 +2729,22 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } +/** + * Replace (*childp)->bs by @new_bs. + * + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents + * generally cannot handle a BdrvChild with .bs == NULL, so clearing + * BdrvChild.bs should generally immediately be followed by the + * BdrvChild pointer being cleared as well. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed. @free_empty_child should only be false if the caller will + * free the BdrvChild themselves (this may be important in a + * transactional context, where it may only be freed on commit). + */ static void bdrv_replace_child_noperm(BdrvChild **childp, - BlockDriverState *new_bs) + BlockDriverState *new_bs, + bool free_empty_child) { BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; @@ -2743,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } child->bs = new_bs; + if (!new_bs) { + *childp = NULL; + } if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2772,6 +2813,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, bdrv_parent_drained_end_single(child); drain_saldo++; } + + if (free_empty_child && !child->bs) { + bdrv_child_free(child); + } } /** @@ -2801,7 +2846,14 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(s->child, NULL); + /* + * Pass free_empty_child=false, because we still need the child + * for the AioContext operations on the parent below; those + * BdrvChildClass methods all work on a BdrvChild object, so we + * need to keep it as an empty shell (after this function, it will + * not be attached to any parent, and it will not have a .bs). + */ + bdrv_replace_child_noperm(s->child, NULL, false); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); @@ -2823,7 +2875,6 @@ static void bdrv_attach_child_common_abort(void *opaque) bdrv_unref(bs); bdrv_child_free(child); - *s->child = NULL; } static TransactionActionDrv bdrv_attach_child_common_drv = { @@ -2901,7 +2952,9 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); - bdrv_replace_child_noperm(&new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs, true); + /* child_bs was non-NULL, so new_child must not have been freed */ + assert(new_child != NULL); *child = new_child; @@ -2960,8 +3013,7 @@ static void bdrv_detach_child(BdrvChild **childp) { BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(childp, NULL); - bdrv_child_free(*childp); + bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { /* @@ -4951,7 +5003,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, } if (child->bs) { - bdrv_replace_child_tran(childp, NULL, tran); + /* + * Pass free_empty_child=false, we will free the child in + * bdrv_remove_filter_or_cow_child_commit() + */ + bdrv_replace_child_tran(childp, NULL, tran, false); } s = g_new(BdrvRemoveFilterOrCowChild, 1); @@ -4961,8 +5017,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - - *childp = NULL; } /* @@ -5005,7 +5059,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * Passing a pointer to the local variable @c is fine here, because * @to is not NULL, and so &c will not be attached to the transaction. */ - bdrv_replace_child_tran(&c, to, tran); + bdrv_replace_child_tran(&c, to, tran, true); } return 0; @@ -5161,7 +5215,9 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(&child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran, true); + /* @new_bs must have been non-NULL, so @child must not have been freed */ + assert(child != NULL); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); From 16e29cc050516ac0915f0db9226079b17dcbcc51 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Mon, 15 Nov 2021 15:54:06 +0100 Subject: [PATCH 37/76] iotests/030: Unthrottle parallel jobs in reverse See the comment for why this is necessary. Signed-off-by: Hanna Reitz Message-Id: <20211111120829.81329-11-hreitz@redhat.com> Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-11-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- tests/qemu-iotests/030 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 5fb65b4bef..567bf1da67 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase): speed=1024) self.assert_qmp(result, 'return', {}) - for job in pending_jobs: + # Do this in reverse: After unthrottling them, some jobs may finish + # before we have unthrottled all of them. This will drain their + # subgraph, and this will make jobs above them advance (despite those + # jobs on top being throttled). In the worst case, all jobs below the + # top one are finished before we can unthrottle it, and this makes it + # advance so far that it completes before we can unthrottle it - which + # results in an error. + # Starting from the top (i.e. in reverse) does not have this problem: + # When a job finishes, the ones below it are not advanced. + for job in reversed(pending_jobs): result = self.vm.qmp('block-job-set-speed', device=job, speed=0) self.assert_qmp(result, 'return', {}) From 4d8b0f0a95361b1a3888f3eb3f76b27420383753 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Nov 2021 15:54:07 +0100 Subject: [PATCH 38/76] docs: Deprecate incorrectly typed device_add arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While introducing a non-QemuOpts code path for device creation for JSON -device, we noticed that QMP device_add doesn't check its input correctly (accepting arguments that should have been rejected), and that users may be relying on this behaviour (libvirt did until it was fixed recently). Let's use a deprecation period before we fix this bug in QEMU to avoid nasty surprises for users. Signed-off-by: Kevin Wolf Message-Id: <20211111143530.18985-1-kwolf@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Daniel P. Berrangé Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-12-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- docs/about/deprecated.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 600031210d..c03fcf951f 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -250,6 +250,20 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +Incorrectly typed ``device_add`` arguments (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, QEMU +incorrectly accepts certain invalid arguments: Any object or list arguments are +silently ignored. Other argument types are not checked, but an implicit +conversion happens, so that e.g. string values can be assigned to integer +device properties or vice versa. + +This is a bug in QEMU that will be fixed in the future so that previously +accepted incorrect commands will return an error. Users should make sure that +all arguments passed to ``device_add`` are consistent with the documented +property types. + System accelerators ------------------- From c9d4e42a8febe4db22eed8463087b38c3c74fd4c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 15 Nov 2021 15:54:09 +0100 Subject: [PATCH 39/76] softmmu/qdev-monitor: fix use-after-free in qdev_set_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by Coverity (CID 1465222). Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id") Cc: Damien Hedde Cc: Kevin Wolf Cc: Michael S. Tsirkin Signed-off-by: Stefan Hajnoczi Message-Id: <20211102163342.31162-1-stefanha@redhat.com> Reviewed-by: Kevin Wolf Reviewed-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Marc-André Lureau Reviewed-by: Damien Hedde Reviewed-by: Markus Armbruster Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-14-kwolf@redhat.com> Signed-off-by: Hanna Reitz --- softmmu/qdev-monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index b5aaae4b8c..01ec420e61 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) if (prop) { dev->id = id; } else { - g_free(id); error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); return NULL; } } else { From 5dbd0ce115c7720268e653fe928bb6a0c1314a80 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 16 Nov 2021 11:14:31 +0100 Subject: [PATCH 40/76] file-posix: Fix alignment after reopen changing O_DIRECT At the end of a reopen, we already call bdrv_refresh_limits(), which should update bs->request_alignment according to the new file descriptor. However, raw_probe_alignment() relies on s->needs_alignment and just uses 1 if it isn't set. We neglected to update this field, so starting with cache=writeback and then reopening with cache=none means that we get an incorrect bs->request_alignment == 1 and unaligned requests fail instead of being automatically aligned. Fix this by recalculating s->needs_alignment in raw_refresh_limits() before calling raw_probe_alignment(). Signed-off-by: Kevin Wolf Message-Id: <20211104113109.56336-1-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Stefano Garzarella Signed-off-by: Kevin Wolf Message-Id: <20211115145409.176785-13-kwolf@redhat.com> [hreitz: Fix iotest 142 for block sizes greater than 512 by operating on a file with a size of 1 MB] Signed-off-by: Hanna Reitz Message-Id: <20211116101431.105252-1-hreitz@redhat.com> --- block/file-posix.c | 20 ++++++++++++++++---- tests/qemu-iotests/142 | 29 +++++++++++++++++++++++++++++ tests/qemu-iotests/142.out | 18 ++++++++++++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 7a27c83060..b283093e5b 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -167,6 +167,7 @@ typedef struct BDRVRawState { int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; + bool force_alignment; bool drop_cache; bool check_cache_dropped; struct { @@ -351,6 +352,17 @@ static bool dio_byte_aligned(int fd) return false; } +static bool raw_needs_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + return true; + } + + return s->force_alignment; +} + /* Check if read is allowed with given memory buffer and length. * * This function is used to check O_DIRECT memory buffer and request alignment. @@ -728,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->has_discard = true; s->has_write_zeroes = true; - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } if (fstat(s->fd, &st) < 0) { ret = -errno; @@ -784,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, * so QEMU makes sure all IO operations on the device are aligned * to sector size, or else FreeBSD will reject them with EINVAL. */ - s->needs_alignment = true; + s->force_alignment = true; } #endif + s->needs_alignment = raw_needs_alignment(bs); #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1251,7 +1261,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; struct stat st; + s->needs_alignment = raw_needs_alignment(bs); raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142 index 69fd10ef51..86d65a2d1a 100755 --- a/tests/qemu-iotests/142 +++ b/tests/qemu-iotests/142 @@ -350,6 +350,35 @@ info block backing-file" echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache" +echo +echo "--- Alignment after changing O_DIRECT ---" +echo + +# Directly test the protocol level: Can unaligned requests succeed even if +# O_DIRECT was only enabled through a reopen and vice versa? + +# Ensure image size is a multiple of the sector size (required for O_DIRECT) +$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create + +# And write some data (not strictly necessary, but it feels better to actually +# have something to be read) +$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO --cache=writeback -f file $TEST_IMG < Date: Mon, 15 Nov 2021 16:39:43 -0600 Subject: [PATCH 41/76] nbd/server: Silence clang sanitizer warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang's sanitizer is picky: memset(NULL, x, 0) is technically undefined behavior, even though no sane implementation of memset() deferences the NULL. Caught by the nbd-qemu-allocation iotest. The alternative to checking before each memset is to instead force an allocation of 1 element instead of g_new0(type, 0)'s behavior of returning NULL for a 0-length array. Reported-by: Peter Maydell Fixes: 3b1f244c59 (nbd: Allow export of multiple bitmaps for one device) Signed-off-by: Eric Blake Message-Id: <20211115223943.626416-1-eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé --- nbd/server.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 6d03e8a4b4..d9164ee6d0 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2020 Red Hat, Inc. + * Copyright (C) 2016-2021 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori * * Network Block Device Server Side @@ -879,7 +879,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, if (!*query) { if (client->opt == NBD_OPT_LIST_META_CONTEXT) { meta->allocation_depth = meta->exp->allocation_depth; - memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + if (meta->exp->nr_export_bitmaps) { + memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + } } trace_nbd_negotiate_meta_query_parse("empty"); return true; @@ -894,7 +896,8 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, if (nbd_strshift(&query, "dirty-bitmap:")) { trace_nbd_negotiate_meta_query_parse("dirty-bitmap:"); if (!*query) { - if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + if (client->opt == NBD_OPT_LIST_META_CONTEXT && + meta->exp->nr_export_bitmaps) { memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); } trace_nbd_negotiate_meta_query_parse("empty"); @@ -1024,7 +1027,9 @@ static int nbd_negotiate_meta_queries(NBDClient *client, /* enable all known contexts */ meta->base_allocation = true; meta->allocation_depth = meta->exp->allocation_depth; - memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + if (meta->exp->nr_export_bitmaps) { + memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + } } else { for (i = 0; i < nb_queries; ++i) { ret = nbd_negotiate_meta_query(client, meta, errp); From 3d212b41e9ccb3f37d04f22c59a960bac099c1d4 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Mon, 15 Nov 2021 14:29:43 -0600 Subject: [PATCH 42/76] nbd/server: Add --selinux-label option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under SELinux, Unix domain sockets have two labels. One is on the disk and can be set with commands such as chcon(1). There is a different label stored in memory (called the process label). This can only be set by the process creating the socket. When using SELinux + SVirt and wanting qemu to be able to connect to a qemu-nbd instance, you must set both labels correctly first. For qemu-nbd the options to set the second label are awkward. You can create the socket in a wrapper program and then exec into qemu-nbd. Or you could try something with LD_PRELOAD. This commit adds the ability to set the label straightforwardly on the command line, via the new --selinux-label flag. (The name of the flag is the same as the equivalent nbdkit option.) A worked example showing how to use the new option can be found in this bug: https://bugzilla.redhat.com/show_bug.cgi?id=1984938 Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1984938 Signed-off-by: Richard W.M. Jones Reviewed-by: Daniel P. Berrangé [eblake: rebase to configure changes, reject --selinux-label if it is not compiled in or not used on a Unix socket] Note that we may relax some of these restrictions at a later date, such as making it possible to label a TCP socket, although it may be smarter to do so as a generic QMP action rather than more one-off command lines in qemu-nbd. Signed-off-by: Eric Blake Message-Id: <20211115202944.615966-1-eblake@redhat.com> Reviewed-by: Thomas Huth [eblake: adjust meson output as suggested by thuth] Signed-off-by: Eric Blake --- meson.build | 10 +++- meson_options.txt | 3 ++ qemu-nbd.c | 46 +++++++++++++++++++ scripts/meson-buildoptions.sh | 3 ++ tests/docker/dockerfiles/centos8.docker | 1 + .../dockerfiles/fedora-i386-cross.docker | 1 + tests/docker/dockerfiles/fedora.docker | 1 + tests/docker/dockerfiles/opensuse-leap.docker | 1 + tests/docker/dockerfiles/ubuntu1804.docker | 1 + tests/docker/dockerfiles/ubuntu2004.docker | 1 + 10 files changed, 67 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 2ece4fe088..084806a941 100644 --- a/meson.build +++ b/meson.build @@ -1201,6 +1201,11 @@ keyutils = dependency('libkeyutils', required: false, has_gettid = cc.has_function('gettid') +# libselinux +selinux = dependency('libselinux', + required: get_option('selinux'), + method: 'pkg-config', kwargs: static_kwargs) + # Malloc tests malloc = [] @@ -1479,6 +1484,7 @@ config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) config_host_data.set('CONFIG_SPICE', spice.found()) config_host_data.set('CONFIG_X11', x11.found()) config_host_data.set('CONFIG_CFI', get_option('cfi')) +config_host_data.set('CONFIG_SELINUX', selinux.found()) config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0]) config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1]) @@ -3054,7 +3060,8 @@ if have_tools qemu_io = executable('qemu-io', files('qemu-io.c'), dependencies: [block, qemuutil], install: true) qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'), - dependencies: [blockdev, qemuutil, gnutls], install: true) + dependencies: [blockdev, qemuutil, gnutls, selinux], + install: true) subdir('storage-daemon') subdir('contrib/rdmacm-mux') @@ -3430,6 +3437,7 @@ summary_info += {'libdaxctl support': libdaxctl} summary_info += {'libudev': libudev} # Dummy dependency, keep .found() summary_info += {'FUSE lseek': fuse_lseek.found()} +summary_info += {'selinux': selinux} summary(summary_info, bool_yn: true, section: 'Dependencies') if not supported_cpus.contains(cpu) diff --git a/meson_options.txt b/meson_options.txt index 411952bc91..e392323732 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -201,3 +201,6 @@ option('slirp', type: 'combo', value: 'auto', option('fdt', type: 'combo', value: 'auto', choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], description: 'Whether and how to find the libfdt library') + +option('selinux', type: 'feature', value: 'auto', + description: 'SELinux support in qemu-nbd') diff --git a/qemu-nbd.c b/qemu-nbd.c index 9d895ba24b..c6c20df68a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -47,6 +47,10 @@ #include "trace/control.h" #include "qemu-version.h" +#ifdef CONFIG_SELINUX +#include +#endif + #ifdef __linux__ #define HAVE_NBD_DEVICE 1 #else @@ -64,6 +68,7 @@ #define QEMU_NBD_OPT_FORK 263 #define QEMU_NBD_OPT_TLSAUTHZ 264 #define QEMU_NBD_OPT_PID_FILE 265 +#define QEMU_NBD_OPT_SELINUX_LABEL 266 #define MBR_SIZE 512 @@ -116,6 +121,9 @@ static void usage(const char *name) " --fork fork off the server process and exit the parent\n" " once the server is running\n" " --pid-file=PATH store the server's process ID in the given file\n" +#ifdef CONFIG_SELINUX +" --selinux-label=LABEL set SELinux process label on listening socket\n" +#endif #if HAVE_NBD_DEVICE "\n" "Kernel NBD client support:\n" @@ -454,6 +462,7 @@ static const char *socket_activation_validate_opts(const char *device, const char *sockpath, const char *address, const char *port, + const char *selinux, bool list) { if (device != NULL) { @@ -472,6 +481,10 @@ static const char *socket_activation_validate_opts(const char *device, return "TCP port number can't be set when using socket activation"; } + if (selinux != NULL) { + return "SELinux label can't be set when using socket activation"; + } + if (list) { return "List mode is incompatible with socket activation"; } @@ -534,6 +547,8 @@ int main(int argc, char **argv) { "trace", required_argument, NULL, 'T' }, { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK }, { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE }, + { "selinux-label", required_argument, NULL, + QEMU_NBD_OPT_SELINUX_LABEL }, { NULL, 0, NULL, 0 } }; int ch; @@ -560,6 +575,7 @@ int main(int argc, char **argv) int old_stderr = -1; unsigned socket_activation; const char *pid_file_name = NULL; + const char *selinux_label = NULL; BlockExportOptions *export_opts; #ifdef CONFIG_POSIX @@ -749,6 +765,9 @@ int main(int argc, char **argv) case QEMU_NBD_OPT_PID_FILE: pid_file_name = optarg; break; + case QEMU_NBD_OPT_SELINUX_LABEL: + selinux_label = optarg; + break; } } @@ -788,6 +807,7 @@ int main(int argc, char **argv) /* Using socket activation - check user didn't use -p etc. */ const char *err_msg = socket_activation_validate_opts(device, sockpath, bindto, port, + selinux_label, list); if (err_msg != NULL) { error_report("%s", err_msg); @@ -827,6 +847,18 @@ int main(int argc, char **argv) } } + if (selinux_label) { +#ifdef CONFIG_SELINUX + if (sockpath == NULL && device == NULL) { + error_report("--selinux-label is not permitted without --socket"); + exit(EXIT_FAILURE); + } +#else + error_report("SELinux support not enabled in this binary"); + exit(EXIT_FAILURE); +#endif + } + if (list) { saddr = nbd_build_socket_address(sockpath, bindto, port); return qemu_nbd_client_list(saddr, tlscreds, bindto); @@ -940,6 +972,13 @@ int main(int argc, char **argv) } else { backlog = MIN(shared, SOMAXCONN); } +#ifdef CONFIG_SELINUX + if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) { + error_report("Cannot set SELinux socket create context to %s: %s", + selinux_label, strerror(errno)); + exit(EXIT_FAILURE); + } +#endif saddr = nbd_build_socket_address(sockpath, bindto, port); if (qio_net_listener_open_sync(server, saddr, backlog, &local_err) < 0) { @@ -947,6 +986,13 @@ int main(int argc, char **argv) error_report_err(local_err); exit(EXIT_FAILURE); } +#ifdef CONFIG_SELINUX + if (selinux_label && setsockcreatecon_raw(NULL) == -1) { + error_report("Cannot clear SELinux socket create context: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } +#endif } else { size_t i; /* See comment in check_socket_activation above. */ diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 45e1f2e20d..7a17ff4218 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -72,6 +72,7 @@ meson_options_help() { printf "%s\n" ' sdl SDL user interface' printf "%s\n" ' sdl-image SDL Image support for icons' printf "%s\n" ' seccomp seccomp support' + printf "%s\n" ' selinux SELinux support in qemu-nbd' printf "%s\n" ' smartcard CA smartcard emulation support' printf "%s\n" ' snappy snappy compression support' printf "%s\n" ' sparse sparse checker' @@ -215,6 +216,8 @@ _meson_option_parse() { --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;; --enable-seccomp) printf "%s" -Dseccomp=enabled ;; --disable-seccomp) printf "%s" -Dseccomp=disabled ;; + --enable-selinux) printf "%s" -Dselinux=enabled ;; + --disable-selinux) printf "%s" -Dselinux=disabled ;; --enable-slirp) printf "%s" -Dslirp=enabled ;; --disable-slirp) printf "%s" -Dslirp=disabled ;; --enable-slirp=*) quote_sh "-Dslirp=$2" ;; diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker index 46398c61ee..7f135f8e8c 100644 --- a/tests/docker/dockerfiles/centos8.docker +++ b/tests/docker/dockerfiles/centos8.docker @@ -51,6 +51,7 @@ ENV PACKAGES \ libpng-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libslirp-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker index f62a71ce22..13328e6081 100644 --- a/tests/docker/dockerfiles/fedora-i386-cross.docker +++ b/tests/docker/dockerfiles/fedora-i386-cross.docker @@ -8,6 +8,7 @@ ENV PACKAGES \ gcc \ git \ libffi-devel.i686 \ + libselinux-devel.i686 \ libtasn1-devel.i686 \ libzstd-devel.i686 \ make \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index eec1add7f6..c6fd7e1113 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -53,6 +53,7 @@ ENV PACKAGES \ libpng-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libslirp-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker index 5a8bee0289..3bbdb67f4f 100644 --- a/tests/docker/dockerfiles/opensuse-leap.docker +++ b/tests/docker/dockerfiles/opensuse-leap.docker @@ -55,6 +55,7 @@ ENV PACKAGES \ libpulse-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libspice-server-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker index 0880bf3e29..450fd06d0d 100644 --- a/tests/docker/dockerfiles/ubuntu1804.docker +++ b/tests/docker/dockerfiles/ubuntu1804.docker @@ -60,6 +60,7 @@ ENV PACKAGES \ libsdl2-dev \ libsdl2-image-dev \ libseccomp-dev \ + libselinux-dev \ libsnappy-dev \ libspice-protocol-dev \ libspice-server-dev \ diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker index 39de63d012..15a026be09 100644 --- a/tests/docker/dockerfiles/ubuntu2004.docker +++ b/tests/docker/dockerfiles/ubuntu2004.docker @@ -60,6 +60,7 @@ ENV PACKAGES \ libsdl2-dev \ libsdl2-image-dev \ libseccomp-dev \ + libselinux-dev \ libslirp-dev \ libsnappy-dev \ libspice-protocol-dev \ From d47e3751b5b642b9a3d9604f48717a51f1cb0c01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 15 Nov 2021 14:29:10 +0000 Subject: [PATCH 43/76] tests/docker: force NOUSER=1 for base images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As base images are often used to build further images like toolchains ensure we don't add the local user by accident. The local user should only exist on local images and not anything that gets pushed up to the public registry. Reported-by: Richard Henderson Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20211115142915.3797652-2-alex.bennee@linaro.org> --- tests/docker/Makefile.include | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index 7a63a3b7f7..f1a0c5db7a 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -150,6 +150,9 @@ docker-image-debian-sparc64-cross: docker-image-debian10 # The native build should never use the registry docker-image-debian-native: DOCKER_REGISTRY= +# base images should not add a local user +docker-image-debian10: NOUSER=1 +docker-image-debian11: NOUSER=1 # # The build rule for hexagon-cross is special in so far for most of From 81c9b06ea0bf7d5e0c0b00aae41df9060c063cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 15 Nov 2021 14:29:11 +0000 Subject: [PATCH 44/76] tests/vm: sort the special variable list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Making the list alphabetical makes it easier to find the config option you are looking for. Signed-off-by: Alex Bennée Reviewed-by: Willian Rampazzo Reviewed-by: Richard Henderson Message-Id: <20211115142915.3797652-3-alex.bennee@linaro.org> --- tests/vm/Makefile.include | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include index f3a3a1c751..f8ca619cf2 100644 --- a/tests/vm/Makefile.include +++ b/tests/vm/Makefile.include @@ -52,21 +52,21 @@ endif @echo @echo "Special variables:" @echo " BUILD_TARGET=foo - Override the build target" - @echo " TARGET_LIST=a,b,c - Override target list in builds" - @echo ' EXTRA_CONFIGURE_OPTS="..."' - @echo " J=[0..9]* - Override the -jN parameter for make commands" @echo " DEBUG=1 - Enable verbose output on host and interactive debugging" + @echo ' EXTRA_CONFIGURE_OPTS="..." - Pass to configure step' + @echo " J=[0..9]* - Override the -jN parameter for make commands" @echo " LOG_CONSOLE=1 - Log console to file in: ~/.cache/qemu-vm " - @echo " V=1 - Enable verbose ouput on host and guest commands" - @echo " QEMU_LOCAL=1 - Use QEMU binary local to this build." @echo " QEMU=/path/to/qemu - Change path to QEMU binary" - @echo " QEMU_IMG=/path/to/qemu-img - Change path to qemu-img tool" ifeq ($(HAVE_PYTHON_YAML),yes) @echo " QEMU_CONFIG=/path/conf.yml - Change path to VM configuration .yml file." else @echo " (install python3-yaml to enable support for yaml file to configure a VM.)" endif @echo " See conf_example_*.yml for file format details." + @echo " QEMU_IMG=/path/to/qemu-img - Change path to qemu-img tool" + @echo " QEMU_LOCAL=1 - Use QEMU binary local to this build." + @echo " TARGET_LIST=a,b,c - Override target list in builds" + @echo " V=1 - Enable verbose ouput on host and guest commands" vm-build-all: $(addprefix vm-build-, $(IMAGES)) From ebd654aabc1a9128b2ea390faa23c2fb6b4b13b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 15 Nov 2021 14:29:12 +0000 Subject: [PATCH 45/76] tests/vm: don't build using TCG by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While it is useful to run these images using TCG their performance will not be anything like the native guests. Don't do it by default. Fixes: https://gitlab.com/qemu-project/qemu/-/issues/393 Signed-off-by: Alex Bennée Reviewed-by: Willian Rampazzo Reviewed-by: Richard Henderson Message-Id: <20211115142915.3797652-4-alex.bennee@linaro.org> --- tests/vm/Makefile.include | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include index f8ca619cf2..ae91f5043e 100644 --- a/tests/vm/Makefile.include +++ b/tests/vm/Makefile.include @@ -2,16 +2,24 @@ .PHONY: vm-build-all vm-clean-all +HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m)) + EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd) -IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64 +X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64 ifneq ($(GENISOIMAGE),) -IMAGES += ubuntu.i386 centos +X86_IMAGES += ubuntu.i386 centos ifneq ($(EFI_AARCH64),) -IMAGES += ubuntu.aarch64 centos.aarch64 +ARM64_IMAGES += ubuntu.aarch64 centos.aarch64 endif endif +ifeq ($(HOST_ARCH),x86_64) +IMAGES=$(X86_IMAGES) $(if $(USE_TCG),$(ARM64_IMAGES)) +else ifeq ($(HOST_ARCH),aarch64) +IMAGES=$(ARM64_IMAGES) $(if $(USE_TCG),$(X86_IMAGES)) +endif + IMAGES_DIR := $(HOME)/.cache/qemu-vm/images IMAGE_FILES := $(patsubst %, $(IMAGES_DIR)/%.img, $(IMAGES)) @@ -43,7 +51,7 @@ else endif @echo " vm-build-haiku.x86_64 - Build QEMU in Haiku VM" @echo "" - @echo " vm-build-all - Build QEMU in all VMs" + @echo " vm-build-all - Build QEMU in: $(IMAGES)" @echo " vm-clean-all - Clean up VM images" @echo @echo "For trouble-shooting:" @@ -56,6 +64,7 @@ endif @echo ' EXTRA_CONFIGURE_OPTS="..." - Pass to configure step' @echo " J=[0..9]* - Override the -jN parameter for make commands" @echo " LOG_CONSOLE=1 - Log console to file in: ~/.cache/qemu-vm " + @echo " USE_TCG=1 - Use TCG for cross-arch images" @echo " QEMU=/path/to/qemu - Change path to QEMU binary" ifeq ($(HAVE_PYTHON_YAML),yes) @echo " QEMU_CONFIG=/path/conf.yml - Change path to VM configuration .yml file." From a399f9143e801efbc574495c7df0eba3bd729e9f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Nov 2021 14:29:13 +0000 Subject: [PATCH 46/76] meson: remove useless libdl test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dlopen is never used after it is sought via cc.find_library, because plugins use gmodule instead; remove the test. Signed-off-by: Paolo Bonzini Reviewed-by: Thomas Huth Message-Id: <20211110092454.30916-1-pbonzini@redhat.com> Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Message-Id: <20211115142915.3797652-5-alex.bennee@linaro.org> --- accel/tcg/meson.build | 2 +- meson.build | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 137a1a44cc..7a0a79d731 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -10,7 +10,7 @@ tcg_ss.add(files( )) tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c')) tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c')) -tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl]) +tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')]) specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files( diff --git a/meson.build b/meson.build index 2ece4fe088..baeaee4522 100644 --- a/meson.build +++ b/meson.build @@ -566,13 +566,7 @@ endif spice_headers = spice.partial_dependency(compile_args: true, includes: true) rt = cc.find_library('rt', required: false) -libdl = not_found -if 'CONFIG_PLUGIN' in config_host - libdl = cc.find_library('dl', required: false) - if not cc.has_function('dlopen', dependencies: libdl) - error('dlopen not found') - endif -endif + libiscsi = not_found if not get_option('libiscsi').auto() or have_block libiscsi = dependency('libiscsi', version: '>=1.9.0', From d7c2e2b3f447133175be5817663147d1309bde2d Mon Sep 17 00:00:00 2001 From: Cleber Rosa Date: Mon, 15 Nov 2021 14:29:14 +0000 Subject: [PATCH 47/76] Jobs based on custom runners: add CentOS Stream 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces three different parts of a job designed to run on a custom runner managed by Red Hat. The goals include: a) propose a model for other organizations that want to onboard their own runners, with their specific platforms, build configuration and tests. b) bring awareness to the differences between upstream QEMU and the version available under CentOS Stream, which is "A preview of upcoming Red Hat Enterprise Linux minor and major releases". c) because of b), it should be easier to identify and reduce the gap between Red Hat's downstream and upstream QEMU. The components of this custom job are: I) OS build environment setup code: - additions to the existing "build-environment.yml" playbook that can be used to set up CentOS/EL 8 systems. - a CentOS Stream 8 specific "build-environment.yml" playbook that adds to the generic one. II) QEMU build configuration: a script that will produce binaries with features as similar as possible to the ones built and packaged on CentOS stream 8. III) Scripts that define the minimum amount of testing that the binaries built with the given configuration (point II) under the given OS build environment (point I) should be subjected to. IV) Job definition: GitLab CI jobs that will dispatch the build/test jobs (see points #II and #III) to the machine specifically configured according to #I. Signed-off-by: Cleber Rosa Signed-off-by: Alex Bennée Reviewed-by: Willian Rampazzo Tested-by: Willian Rampazzo Message-Id: <20211111160501.862396-2-crosa@redhat.com> Message-Id: <20211115142915.3797652-6-alex.bennee@linaro.org> --- .gitlab-ci.d/custom-runners.yml | 29 +++ docs/devel/ci-jobs.rst.inc | 7 + .../org.centos/stream/8/build-environment.yml | 51 +++++ .../ci/org.centos/stream/8/x86_64/configure | 208 ++++++++++++++++++ .../org.centos/stream/8/x86_64/test-avocado | 70 ++++++ scripts/ci/org.centos/stream/README | 17 ++ scripts/ci/setup/build-environment.yml | 38 ++++ 7 files changed, 420 insertions(+) create mode 100644 scripts/ci/org.centos/stream/8/build-environment.yml create mode 100755 scripts/ci/org.centos/stream/8/x86_64/configure create mode 100755 scripts/ci/org.centos/stream/8/x86_64/test-avocado create mode 100644 scripts/ci/org.centos/stream/README diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml index a89a20da48..1f56297dfa 100644 --- a/.gitlab-ci.d/custom-runners.yml +++ b/.gitlab-ci.d/custom-runners.yml @@ -248,3 +248,32 @@ ubuntu-20.04-aarch64-notcg: - ../configure --disable-libssh --disable-tcg - make --output-sync -j`nproc` - make --output-sync -j`nproc` check V=1 + +centos-stream-8-x86_64: + allow_failure: true + needs: [] + stage: build + tags: + - centos_stream_8 + - x86_64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE" + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + when: on_failure + expire_in: 7 days + paths: + - build/tests/results/latest/results.xml + - build/tests/results/latest/test-results + reports: + junit: build/tests/results/latest/results.xml + before_script: + - JOBS=$(expr $(nproc) + 1) + script: + - mkdir build + - cd build + - ../scripts/ci/org.centos/stream/8/x86_64/configure + - make -j"$JOBS" + - make NINJA=":" check + - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc index 277975e4ad..db3f571d5f 100644 --- a/docs/devel/ci-jobs.rst.inc +++ b/docs/devel/ci-jobs.rst.inc @@ -49,3 +49,10 @@ S390X_RUNNER_AVAILABLE If you've got access to an IBM Z host that can be used as a gitlab-CI runner, you can set this variable to enable the tests that require this kind of host. The runner should be tagged with "s390x". + +CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you've got access to a CentOS Stream 8 x86_64 host that can be +used as a gitlab-CI runner, you can set this variable to enable the +tests that require this kind of host. The runner should be tagged with +both "centos_stream_8" and "x86_64". diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml new file mode 100644 index 0000000000..42b0471634 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/build-environment.yml @@ -0,0 +1,51 @@ +--- +- name: Installation of extra packages to build QEMU + hosts: all + tasks: + - name: Extra check for CentOS Stream 8 + lineinfile: + path: /etc/redhat-release + line: CentOS Stream release 8 + state: present + check_mode: yes + register: centos_stream_8 + + - name: Enable PowerTools repo on CentOS Stream 8 + ini_file: + path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo + section: powertools + option: enabled + value: "1" + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 + + - name: Install basic packages to build QEMU on CentOS Stream 8 + dnf: + name: + - device-mapper-multipath-devel + - glusterfs-api-devel + - gnutls-devel + - libcap-ng-devel + - libcurl-devel + - libfdt-devel + - libiscsi-devel + - libpmem-devel + - librados-devel + - librbd-devel + - libseccomp-devel + - libssh-devel + - libxkbcommon-devel + - ninja-build + - numactl-devel + - python3-sphinx + - redhat-rpm-config + - snappy-devel + - spice-server-devel + - systemd-devel + state: present + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure new file mode 100755 index 0000000000..048e80dc49 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/configure @@ -0,0 +1,208 @@ +#!/bin/sh -e +# +# Configuration for QEMU based on CentOS Stream 8 x86_64 builds +# +# The "configure" command line is based on: +# +# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec +# +# But, because the SPEC file contains a number of conditionals and +# variable and expansions only available at RPM build time, this version +# was initially generated from an actual RPM build on an x86_64 platform. +# +# From that initial version, options that are required or are a +# consequence of non-upstream patches have been adapted. One example +# is "--without-default-devices" which is *not* present here, given +# that patches adding downstream specific devices are not available. +# +../configure \ +--prefix="/usr" \ +--libdir="/usr/lib64" \ +--datadir="/usr/share" \ +--sysconfdir="/etc" \ +--interp-prefix=/usr/qemu-%M \ +--localstatedir="/var" \ +--docdir="/usr/share/doc" \ +--libexecdir="/usr/libexec" \ +--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ +--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \ +--with-suffix="qemu-kvm" \ +--firmwarepath=/usr/share/qemu-firmware \ +--with-git=meson \ +--with-git-submodules=update \ +--target-list="x86_64-softmmu" \ +--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \ +--audio-drv-list="" \ +--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \ +--with-coroutine=ucontext \ +--with-git=git \ +--tls-priority=@QEMU,SYSTEM \ +--disable-attr \ +--disable-auth-pam \ +--disable-avx2 \ +--disable-avx512f \ +--disable-bochs \ +--disable-bpf \ +--disable-brlapi \ +--disable-bsd-user \ +--disable-bzip2 \ +--disable-cap-ng \ +--disable-capstone \ +--disable-cfi \ +--disable-cfi-debug \ +--disable-cloop \ +--disable-cocoa \ +--disable-coroutine-pool \ +--disable-crypto-afalg \ +--disable-curl \ +--disable-curses \ +--disable-debug-info \ +--disable-debug-mutex \ +--disable-debug-tcg \ +--disable-dmg \ +--disable-docs \ +--disable-fuse \ +--disable-fuse-lseek \ +--disable-gcrypt \ +--disable-gio \ +--disable-glusterfs \ +--disable-gnutls \ +--disable-gtk \ +--disable-guest-agent \ +--disable-guest-agent-msi \ +--disable-hax \ +--disable-hvf \ +--disable-iconv \ +--disable-kvm \ +--disable-libdaxctl \ +--disable-libiscsi \ +--disable-libnfs \ +--disable-libpmem \ +--disable-libssh \ +--disable-libudev \ +--disable-libusb \ +--disable-libxml2 \ +--disable-linux-aio \ +--disable-linux-io-uring \ +--disable-linux-user \ +--disable-live-block-migration \ +--disable-lto \ +--disable-lzfse \ +--disable-lzo \ +--disable-malloc-trim \ +--disable-membarrier \ +--disable-modules \ +--disable-module-upgrades \ +--disable-mpath \ +--disable-multiprocess \ +--disable-netmap \ +--disable-nettle \ +--disable-numa \ +--disable-nvmm \ +--disable-opengl \ +--disable-parallels \ +--disable-pie \ +--disable-pvrdma \ +--disable-qcow1 \ +--disable-qed \ +--disable-qom-cast-debug \ +--disable-rbd \ +--disable-rdma \ +--disable-replication \ +--disable-rng-none \ +--disable-safe-stack \ +--disable-sanitizers \ +--disable-sdl \ +--disable-sdl-image \ +--disable-seccomp \ +--disable-slirp-smbd \ +--disable-smartcard \ +--disable-snappy \ +--disable-sparse \ +--disable-spice \ +--disable-strip \ +--disable-system \ +--disable-tcg \ +--disable-tools \ +--disable-tpm \ +--disable-u2f \ +--disable-usb-redir \ +--disable-user \ +--disable-vde \ +--disable-vdi \ +--disable-vhost-crypto \ +--disable-vhost-kernel \ +--disable-vhost-net \ +--disable-vhost-scsi \ +--disable-vhost-user \ +--disable-vhost-user-blk-server \ +--disable-vhost-vdpa \ +--disable-vhost-vsock \ +--disable-virglrenderer \ +--disable-virtfs \ +--disable-virtiofsd \ +--disable-vnc \ +--disable-vnc-jpeg \ +--disable-vnc-png \ +--disable-vnc-sasl \ +--disable-vte \ +--disable-vvfat \ +--disable-werror \ +--disable-whpx \ +--disable-xen \ +--disable-xen-pci-passthrough \ +--disable-xfsctl \ +--disable-xkbcommon \ +--disable-zstd \ +--enable-attr \ +--enable-avx2 \ +--enable-cap-ng \ +--enable-capstone \ +--enable-coroutine-pool \ +--enable-curl \ +--enable-debug-info \ +--enable-docs \ +--enable-fdt \ +--enable-gcrypt \ +--enable-glusterfs \ +--enable-gnutls \ +--enable-guest-agent \ +--enable-iconv \ +--enable-kvm \ +--enable-libiscsi \ +--enable-libpmem \ +--enable-libssh \ +--enable-libusb \ +--enable-libudev \ +--enable-linux-aio \ +--enable-lzo \ +--enable-malloc-trim \ +--enable-modules \ +--enable-mpath \ +--enable-numa \ +--enable-opengl \ +--enable-pie \ +--enable-rbd \ +--enable-rdma \ +--enable-seccomp \ +--enable-snappy \ +--enable-smartcard \ +--enable-spice \ +--enable-system \ +--enable-tcg \ +--enable-tools \ +--enable-tpm \ +--enable-trace-backend=dtrace \ +--enable-usb-redir \ +--enable-virtiofsd \ +--enable-vhost-kernel \ +--enable-vhost-net \ +--enable-vhost-user \ +--enable-vhost-user-blk-server \ +--enable-vhost-vdpa \ +--enable-vhost-vsock \ +--enable-vnc \ +--enable-vnc-png \ +--enable-vnc-sasl \ +--enable-werror \ +--enable-xkbcommon diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado new file mode 100755 index 0000000000..7aeecbcfb8 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado @@ -0,0 +1,70 @@ +#!/bin/sh -e +# +# Runs a previously vetted list of tests, either marked explicitly for +# KVM and x86_64, or tests that are generic enough to be valid for all +# targets. Such a test list can be generated with: +# +# ./tests/venv/bin/avocado list --filter-by-tags-include-empty \ +# --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \ +# tests/avocado/ +# +# This is almost the complete list of avocado based tests available at +# the time this was compile, with the following exceptions: +# +# * Require machine type "x-remote": +# - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64 +# +# * Needs superuser privileges: +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs +# +# * Requires display type "egl-headless": +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl +# +# * Test is marked (unconditionally) to be skipped: +# - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types +# +make get-vm-images +./tests/venv/bin/avocado run \ + --job-results-dir=tests/results/ \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \ + tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \ + tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \ + tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \ + tests/avocado/hotplug_cpu.py:HotPlugCPU.test \ + tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \ + tests/avocado/migration.py:Migration.test_migration_with_exec \ + tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \ + tests/avocado/migration.py:Migration.test_migration_with_unix \ + tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \ + tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \ + tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \ + tests/avocado/version.py:Version.test_qmp_human_info_version \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \ + tests/avocado/vnc.py:Vnc.test_change_password \ + tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \ + tests/avocado/vnc.py:Vnc.test_no_vnc \ + tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README new file mode 100644 index 0000000000..e3eadfe3ea --- /dev/null +++ b/scripts/ci/org.centos/stream/README @@ -0,0 +1,17 @@ +This directory contains scripts for generating a build of QEMU that +closely matches the CentOS Stream[1] builds of the qemu-kvm package. + +To have the environment ready to configure, build QEMU and run tests, +please start with a CentOS Stream machine and: + + * apply the generic "build-environment.yml" playbook located at + scripts/ci/setup + + * apply the "build-environment.yml" in the directory following the + CentOS Stream version (such as "8"). + +This currently only covers CentOS Stream 8 environments and +packages[2]. + +[1] https://www.centos.org/centos-stream/ +[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml index 581c1c75d1..599896cc5b 100644 --- a/scripts/ci/setup/build-environment.yml +++ b/scripts/ci/setup/build-environment.yml @@ -114,3 +114,41 @@ when: - ansible_facts['distribution'] == 'Ubuntu' - ansible_facts['distribution_version'] == '20.04' + + - name: Install basic packages to build QEMU on EL8 + dnf: + # This list of packages start with tests/docker/dockerfiles/centos8.docker + # but only include files that are common to all distro variants and present + # in the standard repos (no add-ons) + name: + - bzip2 + - bzip2-devel + - dbus-daemon + - diffutils + - gcc + - gcc-c++ + - genisoimage + - gettext + - git + - glib2-devel + - libaio-devel + - libepoxy-devel + - libgcrypt-devel + - lzo-devel + - make + - mesa-libEGL-devel + - nettle-devel + - nmap-ncat + - perl-Test-Harness + - pixman-devel + - python36 + - rdma-core-devel + - spice-glib-devel + - spice-server + - systemtap-sdt-devel + - tar + - zlib-devel + state: present + when: + - ansible_facts['distribution_file_variety'] == 'RedHat' + - ansible_facts['distribution_version'] == '8' From 60bec83e8a5c7cbaa47648f4b3370c8d53e48982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 15 Nov 2021 14:29:15 +0000 Subject: [PATCH 48/76] gitlab-ci: Split custom-runners.yml in one file per runner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To ease maintenance, add the custom-runners/ directory and split custom-runners.yml in 3 files, all included by the current custom-runners.yml: - ubuntu-18.04-s390x.yml - ubuntu-20.04-aarch64.yml - centos-stream-8-x86_64.yml Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Alex Bennée Message-Id: <20211115095608.2436223-1-philmd@redhat.com> Reviewed-by: Willian Rampazzo Message-Id: <20211115142915.3797652-7-alex.bennee@linaro.org> --- .gitlab-ci.d/custom-runners.yml | 268 +----------------- .../custom-runners/centos-stream-8-x86_64.yml | 28 ++ .../custom-runners/ubuntu-18.04-s390x.yml | 118 ++++++++ .../custom-runners/ubuntu-20.04-aarch64.yml | 118 ++++++++ 4 files changed, 268 insertions(+), 264 deletions(-) create mode 100644 .gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml create mode 100644 .gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml create mode 100644 .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml index 1f56297dfa..056c374619 100644 --- a/.gitlab-ci.d/custom-runners.yml +++ b/.gitlab-ci.d/custom-runners.yml @@ -13,267 +13,7 @@ variables: GIT_STRATEGY: clone -# All ubuntu-18.04 jobs should run successfully in an environment -# setup by the scripts/ci/setup/build-environment.yml task -# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" -ubuntu-18.04-s390x-all-linux-static: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$S390X_RUNNER_AVAILABLE" - script: - # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 - # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages - - mkdir build - - cd build - - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - - make --output-sync -j`nproc` check-tcg V=1 - -ubuntu-18.04-s390x-all: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$S390X_RUNNER_AVAILABLE" - script: - - mkdir build - - cd build - - ../configure --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-alldbg: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --enable-debug --disable-libssh - - make clean - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-clang: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-tci: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --enable-tcg-interpreter - - make --output-sync -j`nproc` - -ubuntu-18.04-s390x-notcg: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --disable-tcg - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -# All ubuntu-20.04 jobs should run successfully in an environment -# setup by the scripts/ci/setup/qemu/build-environment.yml task -# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" -ubuntu-20.04-aarch64-all-linux-static: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$AARCH64_RUNNER_AVAILABLE" - script: - # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 - # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages - - mkdir build - - cd build - - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - - make --output-sync -j`nproc` check-tcg V=1 - -ubuntu-20.04-aarch64-all: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-alldbg: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$AARCH64_RUNNER_AVAILABLE" - script: - - mkdir build - - cd build - - ../configure --enable-debug --disable-libssh - - make clean - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-clang: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-tci: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --enable-tcg-interpreter - - make --output-sync -j`nproc` - -ubuntu-20.04-aarch64-notcg: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --disable-tcg - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -centos-stream-8-x86_64: - allow_failure: true - needs: [] - stage: build - tags: - - centos_stream_8 - - x86_64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE" - artifacts: - name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" - when: on_failure - expire_in: 7 days - paths: - - build/tests/results/latest/results.xml - - build/tests/results/latest/test-results - reports: - junit: build/tests/results/latest/results.xml - before_script: - - JOBS=$(expr $(nproc) + 1) - script: - - mkdir build - - cd build - - ../scripts/ci/org.centos/stream/8/x86_64/configure - - make -j"$JOBS" - - make NINJA=":" check - - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado +include: + - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml' + - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml' + - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml' diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml new file mode 100644 index 0000000000..49aa703f55 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml @@ -0,0 +1,28 @@ +centos-stream-8-x86_64: + allow_failure: true + needs: [] + stage: build + tags: + - centos_stream_8 + - x86_64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE" + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + when: on_failure + expire_in: 7 days + paths: + - build/tests/results/latest/results.xml + - build/tests/results/latest/test-results + reports: + junit: build/tests/results/latest/results.xml + before_script: + - JOBS=$(expr $(nproc) + 1) + script: + - mkdir build + - cd build + - ../scripts/ci/org.centos/stream/8/x86_64/configure + - make -j"$JOBS" + - make NINJA=":" check + - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado diff --git a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml new file mode 100644 index 0000000000..f39d874a1e --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml @@ -0,0 +1,118 @@ +# All ubuntu-18.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-18.04-s390x-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-18.04-s390x-all: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-alldbg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-clang: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-tci: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-18.04-s390x-notcg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 diff --git a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml new file mode 100644 index 0000000000..920e388bd0 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml @@ -0,0 +1,118 @@ +# All ubuntu-20.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/qemu/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-20.04-aarch64-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-20.04-aarch64-all: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-alldbg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-clang: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-tci: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-20.04-aarch64-notcg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 From 9968de0a4a5470bd7b98dcd2fae5d5269908f16b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 16 Nov 2021 11:27:57 +0000 Subject: [PATCH 49/76] gitlab: skip cirrus jobs on master and stable branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the primary QEMU repository we want the CI jobs to run on the staging branch as a gating CI test. Cirrus CI has very limited job concurrency, so if there are too many jobs triggered they'll queue up and hit the GitLab CI job timeout before they complete on Cirrus. If we let Cirrus jobs run again on the master branch immediately after merging from staging, that just increases the chances jobs will get queued and subsequently timeout. The same applies for merges to the stable branches. User forks meanwhile should be allowed to run Cirrus CI jobs freely. Signed-off-by: Daniel P. Berrangé Reviewed-by: Richard Henderson Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Willian Rampazzo Message-Id: <20211116112757.1909176-1-berrange@redhat.com> Signed-off-by: Alex Bennée --- .gitlab-ci.d/cirrus.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml index e7b25e7427..cc2f2e8906 100644 --- a/.gitlab-ci.d/cirrus.yml +++ b/.gitlab-ci.d/cirrus.yml @@ -40,6 +40,9 @@ - cat .gitlab-ci.d/cirrus/$NAME.yml - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml rules: + # Allow on 'staging' branch and 'stable-X.Y-staging' branches only + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/' + when: never - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN" x64-freebsd-12-build: From f26bd6ff21df2f1d155ca17eef360423b706ab7f Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 11 Nov 2021 09:37:15 -0500 Subject: [PATCH 50/76] python/aqmp: Fix disconnect during capabilities negotiation If we receive ConnectionResetError (ECONNRESET) while attempting to perform capabilities negotiation -- prior to the establishment of the async reader/writer tasks -- the disconnect function is not aware that we are in an error pathway. As a result, when attempting to close the StreamWriter, we'll see the same ConnectionResetError that caused us to initiate a disconnect in the first place, which will cause the disconnect task itself to fail, which emits a CRITICAL logging event. I still don't know if there's a smarter way to check to see if an exception received at this point is "the same" exception as the one that caused the initial disconnect, but for now the problem can be avoided by improving the error pathway detection in the exit path. Reported-by: Thomas Huth Signed-off-by: John Snow Tested-by: Thomas Huth Message-id: 20211111143719.2162525-2-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/aqmp/protocol.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py index ae1df24026..860b79512d 100644 --- a/python/qemu/aqmp/protocol.py +++ b/python/qemu/aqmp/protocol.py @@ -623,13 +623,21 @@ class AsyncProtocol(Generic[T]): def _done(task: Optional['asyncio.Future[Any]']) -> bool: return task is not None and task.done() - # NB: We can't rely on _bh_tasks being done() here, it may not - # yet have had a chance to run and gather itself. + # Are we already in an error pathway? If either of the tasks are + # already done, or if we have no tasks but a reader/writer; we + # must be. + # + # NB: We can't use _bh_tasks to check for premature task + # completion, because it may not yet have had a chance to run + # and gather itself. tasks = tuple(filter(None, (self._writer_task, self._reader_task))) error_pathway = _done(self._reader_task) or _done(self._writer_task) + if not tasks: + error_pathway |= bool(self._reader) or bool(self._writer) try: - # Try to flush the writer, if possible: + # Try to flush the writer, if possible. + # This *may* cause an error and force us over into the error path. if not error_pathway: await self._bh_flush_writer() except BaseException as err: @@ -639,7 +647,7 @@ class AsyncProtocol(Generic[T]): self.logger.debug("%s:\n%s\n", emsg, pretty_traceback()) raise finally: - # Cancel any still-running tasks: + # Cancel any still-running tasks (Won't raise): if self._writer_task is not None and not self._writer_task.done(): self.logger.debug("Cancelling writer task.") self._writer_task.cancel() @@ -652,7 +660,7 @@ class AsyncProtocol(Generic[T]): self.logger.debug("Waiting for tasks to complete ...") await asyncio.wait(tasks) - # Lastly, close the stream itself. (May raise): + # Lastly, close the stream itself. (*May raise*!): await self._bh_close_stream(error_pathway) self.logger.debug("Disconnected.") From 25de7f50121f251c45d111582d786db7ce0768d3 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 11 Nov 2021 09:37:16 -0500 Subject: [PATCH 51/76] python/aqmp: fix ConnectError string method When ConnectError is used to wrap an Exception that was initialized without an error message, we are treated to a traceback with a rubbish line like this: ... ConnectError: Failed to establish session: Correct this to use the name of an exception as a fallback message: ... ConnectError: Failed to establish session: EOFError Better! Signed-off-by: John Snow Reported-by: Thomas Huth Tested-by: Thomas Huth Message-id: 20211111143719.2162525-3-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/aqmp/protocol.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py index 860b79512d..5190b33b13 100644 --- a/python/qemu/aqmp/protocol.py +++ b/python/qemu/aqmp/protocol.py @@ -79,7 +79,11 @@ class ConnectError(AQMPError): self.exc: Exception = exc def __str__(self) -> str: - return f"{self.error_message}: {self.exc!s}" + cause = str(self.exc) + if not cause: + # If there's no error string, use the exception name. + cause = exception_summary(self.exc) + return f"{self.error_message}: {cause}" class StateError(AQMPError): From 47b43acd57e6e0b2910683f00a758adbc418dde6 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 11 Nov 2021 09:37:17 -0500 Subject: [PATCH 52/76] scripts/device-crash-test: simplify Exception handling We don't need to handle KeyboardInterruptError specifically; we can instead tighten the scope of the broad Exception handlers to only catch "Exception", which has the effect of allowing all BaseException classes that do not inherit from Exception to be raised through. KeyboardInterruptError and a few other important ones are BaseExceptions, so this does the same thing with less code. Signed-off-by: John Snow Reported-by: Thomas Huth Tested-by: Thomas Huth Message-id: 20211111143719.2162525-4-jsnow@redhat.com Signed-off-by: John Snow --- scripts/device-crash-test | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 8331c057b8..d91e8616ef 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -317,9 +317,7 @@ class QemuBinaryInfo(object): try: vm.launch() mi['runnable'] = True - except KeyboardInterrupt: - raise - except: + except Exception: dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info()) dbg("log: %r", vm.get_log()) mi['runnable'] = False @@ -360,9 +358,7 @@ def checkOneCase(args, testcase): exc_traceback = None try: vm.launch() - except KeyboardInterrupt: - raise - except: + except Exception: exc_traceback = traceback.format_exc() dbg("Exception while running test case") finally: From 76f86e78b255d17aad2ebd1177069c86f08039ef Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 11 Nov 2021 09:37:18 -0500 Subject: [PATCH 53/76] scripts/device-crash-test: don't emit AQMP connection errors to stdout These errors are expected, so they shouldn't clog up terminal output. In the event that they're *not* expected, we'll be seeing an awful lot more output concerning the nature of the failure. Reported-by: Thomas Huth Signed-off-by: John Snow Tested-by: Thomas Huth Message-id: 20211111143719.2162525-5-jsnow@redhat.com Signed-off-by: John Snow --- scripts/device-crash-test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index d91e8616ef..49bcd61b4f 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -499,6 +499,12 @@ def main(): lvl = logging.WARN logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s') + if not args.debug: + # Async QMP, when in use, is chatty about connection failures. + # This script knowingly generates a ton of connection errors. + # Silence this logger. + logging.getLogger('qemu.aqmp.qmp_client').setLevel(logging.CRITICAL) + fatal_failures = [] wl_stats = {} skipped = 0 From c398a241ec4138e0b995a0215dea84ca93b0384f Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 11 Nov 2021 09:37:19 -0500 Subject: [PATCH 54/76] scripts/device-crash-test: hide tracebacks for QMP connect errors Generally, the traceback for a connection failure is uninteresting and all we need to know is that the connection attempt failed. Reduce the verbosity in these cases, except when debugging. Signed-off-by: John Snow Reported-by: Thomas Huth Tested-by: Thomas Huth Message-id: 20211111143719.2162525-6-jsnow@redhat.com Signed-off-by: John Snow --- scripts/device-crash-test | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 49bcd61b4f..3db0ffe5b8 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -36,6 +36,7 @@ from itertools import chain sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) from qemu.machine import QEMUMachine +from qemu.aqmp import ConnectError logger = logging.getLogger('device-crash-test') dbg = logger.debug @@ -355,10 +356,12 @@ def checkOneCase(args, testcase): dbg("will launch QEMU: %s", cmdline) vm = QEMUMachine(binary=binary, args=args) + exc = None exc_traceback = None try: vm.launch() - except Exception: + except Exception as this_exc: + exc = this_exc exc_traceback = traceback.format_exc() dbg("Exception while running test case") finally: @@ -366,8 +369,9 @@ def checkOneCase(args, testcase): ec = vm.exitcode() log = vm.get_log() - if exc_traceback is not None or ec != 0: - return {'exc_traceback':exc_traceback, + if exc is not None or ec != 0: + return {'exc': exc, + 'exc_traceback':exc_traceback, 'exitcode':ec, 'log':log, 'testcase':testcase, @@ -455,6 +459,17 @@ def logFailure(f, level): for l in f['log'].strip().split('\n'): logger.log(level, "log: %s", l) logger.log(level, "exit code: %r", f['exitcode']) + + # If the Exception is merely a QMP connect error, + # reduce the logging level for its traceback to + # improve visual clarity. + if isinstance(f.get('exc'), ConnectError): + logger.log(level, "%s.%s: %s", + type(f['exc']).__module__, + type(f['exc']).__qualname__, + str(f['exc'])) + level = logging.DEBUG + if f['exc_traceback']: logger.log(level, "exception:") for l in f['exc_traceback'].split('\n'): From 67f9968ce3f0847ffddb6ee2837a3641acd92abf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 16 Nov 2021 21:07:31 +0100 Subject: [PATCH 55/76] Update version for v6.2.0-rc1 release Signed-off-by: Richard Henderson --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8de48381d2..61d04b724a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.1.90 +6.2.91 From fe644e8ebb8534ccc2f77da27cdc7911132f0bd1 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 15 Oct 2021 14:42:19 +0200 Subject: [PATCH 56/76] target/s390x/cpu.h: Remove unused SIGP_MODE defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are unused since commit 075e52b816648f21 ("s390x/cpumodel: we are always in zarchitecture mode") and it's unlikely that we will ever need them again. So let's simply remove them now. Message-Id: <20211015124219.1330830-1-thuth@redhat.com> Reviewed-by: David Hildenbrand Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth --- target/s390x/cpu.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 3153d053e9..ca3845d023 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -674,11 +674,6 @@ QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096); #define SIGP_STAT_INVALID_ORDER 0x00000002UL #define SIGP_STAT_RECEIVER_CHECK 0x00000001UL -/* SIGP SET ARCHITECTURE modes */ -#define SIGP_MODE_ESA_S390 0 -#define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1 -#define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2 - /* SIGP order code mask corresponding to bit positions 56-63 */ #define SIGP_ORDER_MASK 0x000000ff From 0c8c45140c8494a3b6fd36946e437dacac2573b8 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Wed, 10 Nov 2021 15:49:00 +0100 Subject: [PATCH 57/76] docs: rSTify the "TrivialPatches" wiki MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original wiki is here[1]. I converted by copying the wiki source into a .wiki file and convert to rST using `pandoc`: $ pandoc -f Mediawiki -t rst trivial-patches.wiki -o trivial-patches.rst Update the active maintainer names (and drop Michael Tokarev's inactive repo) to reflect current reality. [1] https://wiki.qemu.org/Contribute/TrivialPatches Signed-off-by: Kashyap Chamarthy Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20211110144902.388183-2-kchamart@redhat.com> Signed-off-by: Thomas Huth --- docs/devel/index.rst | 1 + docs/devel/trivial-patches.rst | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 docs/devel/trivial-patches.rst diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 7c25177c5d..dbde1c44e9 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -45,3 +45,4 @@ modifying QEMU's source code. vfio-migration qapi-code-gen writing-monitor-commands + trivial-patches diff --git a/docs/devel/trivial-patches.rst b/docs/devel/trivial-patches.rst new file mode 100644 index 0000000000..db3f2001da --- /dev/null +++ b/docs/devel/trivial-patches.rst @@ -0,0 +1,50 @@ +Trivial Patches +=============== + +Overview +-------- + +Trivial patches that change just a few lines of code sometimes languish +on the mailing list even though they require only a small amount of +review. This is often the case for patches that do not fall under an +actively maintained subsystem and therefore fall through the cracks. + +The trivial patches team take on the task of reviewing and building pull +requests for patches that: + +- Do not fall under an actively maintained subsystem. +- Are single patches or short series (max 2-4 patches). +- Only touch a few lines of code. + +**You should hint that your patch is a candidate by CCing +qemu-trivial@nongnu.org.** + +Repositories +------------ + +Since the trivial patch team rotates maintainership there is only one +active repository at a time: + +- git://github.com/vivier/qemu.git trivial-patches - `browse `__ + +Workflow +-------- + +The trivial patches team rotates the duty of collecting trivial patches +amongst its members. A team member's job is to: + +1. Identify trivial patches on the development mailing list. +2. Review trivial patches, merge them into a git tree, and reply to state + that the patch is queued. +3. Send pull requests to the development mailing list once a week. + +A single team member can be on duty as long as they like. The suggested +time is 1 week before handing off to the next member. + +Team +---- + +If you would like to join the trivial patches team, contact Laurent +Vivier. The current team includes: + +- `Laurent Vivier `__ From 0ff0dcf6b5292e044985c38cbd83a57485ca887c Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Wed, 10 Nov 2021 15:49:01 +0100 Subject: [PATCH 58/76] docs: rSTify the "SubmitAPullRequest" wiki MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original wiki is here[1]. I converted by copying the wiki source into a .wiki file and convert to rST using `pandoc`: $ pandoc -f Mediawiki -t rst submitting-a-pull-request.wiki \ -o submitting-a-pull-request.rst This is a 1-1 conversion; no content changes. [1] https://wiki.qemu.org/Contribute/SubmitAPullRequest Signed-off-by: Kashyap Chamarthy Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20211110144902.388183-3-kchamart@redhat.com> Signed-off-by: Thomas Huth --- docs/devel/index.rst | 1 + docs/devel/submitting-a-pull-request.rst | 76 ++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 docs/devel/submitting-a-pull-request.rst diff --git a/docs/devel/index.rst b/docs/devel/index.rst index dbde1c44e9..211f8876da 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -46,3 +46,4 @@ modifying QEMU's source code. qapi-code-gen writing-monitor-commands trivial-patches + submitting-a-pull-request diff --git a/docs/devel/submitting-a-pull-request.rst b/docs/devel/submitting-a-pull-request.rst new file mode 100644 index 0000000000..8729d29036 --- /dev/null +++ b/docs/devel/submitting-a-pull-request.rst @@ -0,0 +1,76 @@ +Submit a Pull Request +===================== + +QEMU welcomes contributions of code, but we generally expect these to be +sent as simple patch emails to the mailing list (see our page on +`submitting a patch +`__ +for more details). Generally only existing submaintainers of a tree +will need to submit pull requests, although occasionally for a large +patch series we might ask a submitter to send a pull request. This page +documents our recommendations on pull requests for those people. + +A good rule of thumb is not to send a pull request unless somebody asks +you to. + +**Resend the patches with the pull request** as emails which are +threaded as follow-ups to the pull request itself. The simplest way to +do this is to use ``git format-patch --cover-letter`` to create the +emails, and then edit the cover letter to include the pull request +details that ``git request-pull`` outputs. + +**Use PULL as the subject line tag** in both the cover letter and the +retransmitted patch mails (for example, by using +``--subject-prefix=PULL`` in your ``git format-patch`` command). This +helps people to filter in or out the resulting emails (especially useful +if they are only CC'd on one email out of the set). + +**Each patch must have your own Signed-off-by: line** as well as that of +the original author if the patch was not written by you. This is because +with a pull request you're now indicating that the patch has passed via +you rather than directly from the original author. + +**Don't forget to add Reviewed-by: and Acked-by: lines**. When other +people have reviewed the patches you're putting in the pull request, +make sure you've copied their signoffs across. (If you use the `patches +tool `__ to add patches from email +directly to your git repo it will include the tags automatically; if +you're updating patches manually or in some other way you'll need to +edit the commit messages by hand.) + +**Don't send pull requests for code that hasn't passed review**. A pull +request says these patches are ready to go into QEMU now, so they must +have passed the standard code review processes. In particular if you've +corrected issues in one round of code review, you need to send your +fixed patch series as normal to the list; you can't put it in a pull +request until it's gone through. (Extremely trivial fixes may be OK to +just fix in passing, but if in doubt err on the side of not.) + +**Test before sending**. This is an obvious thing to say, but make sure +everything builds (including that it compiles at each step of the patch +series) and that "make check" passes before sending out the pull +request. As a submaintainer you're one of QEMU's lines of defense +against bad code, so double check the details. + +**All pull requests must be signed**. If your key is not already signed +by members of the QEMU community, you should make arrangements to attend +a `KeySigningParty `__ (for +example at KVM Forum) or make alternative arrangements to have your key +signed by an attendee. Key signing requires meeting another community +member \*in person\* so please make appropriate arrangements. By +"signed" here we mean that the pullreq email should quote a tag which is +a GPG-signed tag (as created with 'gpg tag -s ...'). + +**Pull requests not for master should say "not for master" and have +"PULL SUBSYSTEM whatever" in the subject tag**. If your pull request is +targeting a stable branch or some submaintainer tree, please include the +string "not for master" in the cover letter email, and make sure the +subject tag is "PULL SUBSYSTEM s390/block/whatever" rather than just +"PULL". This allows it to be automatically filtered out of the set of +pull requests that should be applied to master. + +You might be interested in the `make-pullreq +`__ +script which automates some of this process for you and includes a few +sanity checks. Note that you must edit it to configure it suitably for +your local situation! From 9f73de8df0335c9387f4ee39e207a65a1615676f Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Wed, 10 Nov 2021 15:49:02 +0100 Subject: [PATCH 59/76] docs: rSTify the "SubmitAPatch" wiki MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The original wiki is here[1]. I copied the wiki source[2] into a .wiki file, and used `pandoc` to convert it to rST: $> pandoc -f Mediawiki -t rst submitting-a-patch.wiki -o submitting-a-patch.rst - The only minor touch-ups I did was to fix URLs. But 99%, it is a 1-1 conversion. (An example of a "touch-up": under the section "Patch emails must include a Signed-off-by: line", I updated the "see SubmittingPatches 1.12" to "1.12) Sign your work") - I have also converted a couple other related wiki pages (included in this patch series) that were hyperlinked within the SubmitAPatch page, or a page that it refers to: - SubmitAPullRequest: https://wiki.qemu.org/Contribute/SubmitAPullRequest - TrivialPatches: https://wiki.qemu.org/Contribute/TrivialPatches - Over time, many people contributed to this wiki page; you can find all the authors in the wiki history[3]. [1] https://wiki.qemu.org/Contribute/SubmitAPatch [2] http://wiki.qemu.org/index.php?title=Contribute/SubmitAPatch&action=edit [3] http://wiki.qemu.org/index.php?title=Contribute/SubmitAPatch&action=history Signed-off-by: Kashyap Chamarthy Message-Id: <20211110144902.388183-4-kchamart@redhat.com> Reviewed-by: Philippe Mathieu-Daudé [thuth: Cosmetic fixes] Signed-off-by: Thomas Huth --- docs/devel/index.rst | 1 + docs/devel/submitting-a-patch.rst | 456 ++++++++++++++++++++++++++++++ 2 files changed, 457 insertions(+) create mode 100644 docs/devel/submitting-a-patch.rst diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 211f8876da..afd937535e 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -46,4 +46,5 @@ modifying QEMU's source code. qapi-code-gen writing-monitor-commands trivial-patches + submitting-a-patch submitting-a-pull-request diff --git a/docs/devel/submitting-a-patch.rst b/docs/devel/submitting-a-patch.rst new file mode 100644 index 0000000000..6fefc67d52 --- /dev/null +++ b/docs/devel/submitting-a-patch.rst @@ -0,0 +1,456 @@ +Submitting a Patch +================== + +QEMU welcomes contributions of code (either fixing bugs or adding new +functionality). However, we get a lot of patches, and so we have some +guidelines about submitting patches. If you follow these, you'll help +make our task of code review easier and your patch is likely to be +committed faster. + +This page seems very long, so if you are only trying to post a quick +one-shot fix, the bare minimum we ask is that: + +- You **must** provide a Signed-off-by: line (this is a hard + requirement because it's how you say "I'm legally okay to contribute + this and happy for it to go into QEMU", modeled after the `Linux kernel + `__ + policy.) ``git commit -s`` or ``git format-patch -s`` will add one. +- All contributions to QEMU must be **sent as patches** to the + qemu-devel `mailing list `__. Patch contributions + should not be posted on the bug tracker, posted on forums, or + externally hosted and linked to. (We have other mailing lists too, + but all patches must go to qemu-devel, possibly with a Cc: to another + list.) ``git send-email`` works best for delivering the patch without + mangling it (`hints for setting it + up `__), + but attachments can be used as a last resort on a first-time + submission. +- You must read replies to your message, and be willing to act on them. + Note, however, that maintainers are often willing to manually fix up + first-time contributions, since there is a learning curve involved in + making an ideal patch submission. + +You do not have to subscribe to post (list policy is to reply-to-all to +preserve CCs and keep non-subscribers in the loop on the threads they +start), although you may find it easier as a subscriber to pick up good +ideas from other posts. If you do subscribe, be prepared for a high +volume of email, often over one thousand messages in a week. The list is +moderated; first-time posts from an email address (whether or not you +subscribed) may be subject to some delay while waiting for a moderator +to whitelist your address. + +The larger your contribution is, or if you plan on becoming a long-term +contributor, then the more important the rest of this page becomes. +Reading the table of contents below should already give you an idea of +the basic requirements. Use the table of contents as a reference, and +read the parts that you have doubts about. + +.. _writing_your_patches: + +Writing your Patches +-------------------- + +.. _use_the_qemu_coding_style: + +Use the QEMU coding style +~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can run run *scripts/checkpatch.pl * before submitting to +check that you are in compliance with our coding standards. Be aware +that ``checkpatch.pl`` is not infallible, though, especially where C +preprocessor macros are involved; use some common sense too. See also: + +- `QEMU Coding Style + `__ + +- `Automate a checkpatch run on + commit `__ + +.. _base_patches_against_current_git_master: + +Base patches against current git master +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's no point submitting a patch which is based on a released version +of QEMU because development will have moved on from then and it probably +won't even apply to master. We only apply selected bugfixes to release +branches and then only as backports once the code has gone into master. + +.. _split_up_long_patches: + +Split up long patches +~~~~~~~~~~~~~~~~~~~~~ + +Split up longer patches into a patch series of logical code changes. +Each change should compile and execute successfully. For instance, don't +add a file to the makefile in patch one and then add the file itself in +patch two. (This rule is here so that people can later use tools like +`git bisect `__ without hitting +points in the commit history where QEMU doesn't work for reasons +unrelated to the bug they're chasing.) Put documentation first, not +last, so that someone reading the series can do a clean-room evaluation +of the documentation, then validate that the code matched the +documentation. A commit message that mentions "Also, ..." is often a +good candidate for splitting into multiple patches. For more thoughts on +properly splitting patches and writing good commit messages, see `this +advice from +OpenStack `__. + +.. _make_code_motion_patches_easy_to_review: + +Make code motion patches easy to review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a series requires large blocks of code motion, there are tricks for +making the refactoring easier to review. Split up the series so that +semantic changes (or even function renames) are done in a separate patch +from the raw code motion. Use a one-time setup of +``git config diff.renames true; git config diff.algorithm patience`` +(Refer to `git-config `__.) The +``diff.renames`` property ensures file rename patches will be given in a +more compact representation that focuses only on the differences across +the file rename, instead of showing the entire old file as a deletion +and the new file as an insertion. Meanwhile, the 'diff.algorithm' +property ensures that extracting a non-contiguous subset of one file +into a new file, but where all extracted parts occur in the same order +both before and after the patch, will reduce churn in trying to treat +unrelated ``}`` lines in the original file as separating hunks of +changes. + +Ideally, a code motion patch can be reviewed by doing:: + + git format-patch --stdout -1 > patch; + diff -u <(sed -n 's/^-//p' patch) <(sed -n 's/^\+//p' patch) + +to focus on the few changes that weren't wholesale code motion. + +.. _dont_include_irrelevant_changes: + +Don't include irrelevant changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In particular, don't include formatting, coding style or whitespace +changes to bits of code that would otherwise not be touched by the +patch. (It's OK to fix coding style issues in the immediate area (few +lines) of the lines you're changing.) If you think a section of code +really does need a reindent or other large-scale style fix, submit this +as a separate patch which makes no semantic changes; don't put it in the +same patch as your bug fix. + +For smaller patches in less frequently changed areas of QEMU, consider +using the `trivial patches process +`__. + +.. _write_a_meaningful_commit_message: + +Write a meaningful commit message +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Commit messages should be meaningful and should stand on their own as a +historical record of why the changes you applied were necessary or +useful. + +QEMU follows the usual standard for git commit messages: the first line +(which becomes the email subject line) is "subsystem: single line +summary of change". Whether the "single line summary of change" starts +with a capital is a matter of taste, but we prefer that the summary does +not end in ".". Look at ``git shortlog -30`` for an idea of sample +subject lines. Then there is a blank line and a more detailed +description of the patch, another blank and your Signed-off-by: line. +Please do not use lines that are longer than 76 characters in your +commit message (so that the text still shows up nicely with "git show" +in a 80-columns terminal window). + +The body of the commit message is a good place to document why your +change is important. Don't include comments like "This is a suggestion +for fixing this bug" (they can go below the ``---`` line in the email so +they don't go into the final commit message). Make sure the body of the +commit message can be read in isolation even if the reader's mailer +displays the subject line some distance apart (that is, a body that +starts with "... so that" as a continuation of the subject line is +harder to follow). + +.. _submitting_your_patches: + +Submitting your Patches +----------------------- + +.. _cc_the_relevant_maintainer: + +CC the relevant maintainer +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches both to the mailing list and CC the maintainer(s) of the +files you are modifying. look in the MAINTAINERS file to find out who +that is. Also try using scripts/get_maintainer.pl from the repository +for learning the most common committers for the files you touched. + +Example:: + + ~/src/qemu/scripts/get_maintainer.pl -f hw/ide/core.c + +In fact, you can automate this, via a one-time setup of ``git config +sendemail.cccmd 'scripts/get_maintainer.pl --nogit-fallback'`` (Refer to +`git-config `__.) + +.. _do_not_send_as_an_attachment: + +Do not send as an attachment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches inline so they are easy to reply to with review comments. +Do not put patches in attachments. + +.. _use_git_format_patch: + +Use ``git format-patch`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the right diff format. +`git format-patch `__ will +produce patch emails in the right format (check the documentation to +find out how to drive it). You can then edit the cover letter before +using ``git send-email`` to mail the files to the mailing list. (We +recommend `git send-email `__ +because mail clients often mangle patches by wrapping long lines or +messing up whitespace. Some distributions do not include send-email in a +default install of git; you may need to download additional packages, +such as 'git-email' on Fedora-based systems.) Patch series need a cover +letter, with shallow threading (all patches in the series are +in-reply-to the cover letter, but not to each other); single unrelated +patches do not need a cover letter (but if you do send a cover letter, +use --numbered so the cover and the patch have distinct subject lines). +Patches are easier to find if they start a new top-level thread, rather +than being buried in-reply-to another existing thread. + +.. _patch_emails_must_include_a_signed_off_by_line: + +Patch emails must include a ``Signed-off-by:`` line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For more information see `1.12) Sign your work +`__. +This is vital or we will not be able to apply your patch! Please use +your real name to sign a patch (not an alias or acronym). + +If you wrote the patch, make sure your "From:" and "Signed-off-by:" +lines use the same spelling. It's okay if you subscribe or contribute to +the list via more than one address, but using multiple addresses in one +commit just confuses things. If someone else wrote the patch, git will +include a "From:" line in the body of the email (different from your +envelope From:) that will give credit to the correct author; but again, +that author's Signed-off-by: line is mandatory, with the same spelling. + +.. _include_a_meaningful_cover_letter: + +Include a meaningful cover letter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This usually applies only to a series that includes multiple patches; +the cover letter explains the overall goal of such a series. + +When reviewers don't know your goal at the start of their review, they +may object to early changes that don't make sense until the end of the +series, because they do not have enough context yet at that point of +their review. A series where the goal is unclear also risks a higher +number of review-fix cycles because the reviewers haven't bought into +the idea yet. If the cover letter can explain these points to the +reviewer, the process will be smoother patches will get merged faster. +Make sure your cover letter includes a diffstat of changes made over the +entire series; potential reviewers know what files they are interested +in, and they need an easy way determine if your series touches them. + +.. _use_the_rfc_tag_if_needed: + +Use the RFC tag if needed +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For example, "[PATCH RFC v2]". ``git format-patch --subject-prefix=RFC`` +can help. + +"RFC" means "Request For Comments" and is a statement that you don't +intend for your patchset to be applied to master, but would like some +review on it anyway. Reasons for doing this include: + +- the patch depends on some pending kernel changes which haven't yet + been accepted, so the QEMU patch series is blocked until that + dependency has been dealt with, but is worth reviewing anyway +- the patch set is not finished yet (perhaps it doesn't cover all use + cases or work with all targets) but you want early review of a major + API change or design structure before continuing + +In general, since it's asking other people to do review work on a +patchset that the submitter themselves is saying shouldn't be applied, +it's best to: + +- use it sparingly +- in the cover letter, be clear about why a patch is an RFC, what areas + of the patchset you're looking for review on, and why reviewers + should care + +.. _participating_in_code_review: + +Participating in Code Review +---------------------------- + +All patches submitted to the QEMU project go through a code review +process before they are accepted. Some areas of code that are well +maintained may review patches quickly, lesser-loved areas of code may +have a longer delay. + +.. _stay_around_to_fix_problems_raised_in_code_review: + +Stay around to fix problems raised in code review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Not many patches get into QEMU straight away -- it is quite common that +developers will identify bugs, or suggest a cleaner approach, or even +just point out code style issues or commit message typos. You'll need to +respond to these, and then send a second version of your patches with +the issues fixed. This takes a little time and effort on your part, but +if you don't do it then your changes will never get into QEMU. It's also +just polite -- it is quite disheartening for a developer to spend time +reviewing your code and suggesting improvements, only to find that +you're not going to do anything further and it was all wasted effort. + +When replying to comments on your patches **reply to all and not just +the sender** -- keeping discussion on the mailing list means everybody +can follow it. + +.. _pay_attention_to_review_comments: + +Pay attention to review comments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Someone took their time to review your work, and it pays to respect that +effort; repeatedly submitting a series without addressing all comments +from the previous round tends to alienate reviewers and stall your +patch. Reviewers aren't always perfect, so it is okay if you want to +argue that your code was correct in the first place instead of blindly +doing everything the reviewer asked. On the other hand, if someone +pointed out a potential issue during review, then even if your code +turns out to be correct, it's probably a sign that you should improve +your commit message and/or comments in the code explaining why the code +is correct. + +If you fix issues that are raised during review **resend the entire +patch series** not just the one patch that was changed. This allows +maintainers to easily apply the fixed series without having to manually +identify which patches are relevant. Send the new version as a complete +fresh email or series of emails -- don't try to make it a followup to +version 1. (This helps automatic patch email handling tools distinguish +between v1 and v2 emails.) + +.. _when_resending_patches_add_a_version_tag: + +When resending patches add a version tag +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All patches beyond the first version should include a version tag -- for +example, "[PATCH v2]". This means people can easily identify whether +they're looking at the most recent version. (The first version of a +patch need not say "v1", just [PATCH] is sufficient.) For patch series, +the version applies to the whole series -- even if you only change one +patch, you resend the entire series and mark it as "v2". Don't try to +track versions of different patches in the series separately. `git +format-patch `__ and `git +send-email `__ both understand +the ``-v2`` option to make this easier. Send each new revision as a new +top-level thread, rather than burying it in-reply-to an earlier +revision, as many reviewers are not looking inside deep threads for new +patches. + +.. _include_version_history_in_patchset_revisions: + +Include version history in patchset revisions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For later versions of patches, include a summary of changes from +previous versions, but not in the commit message itself. In an email +formatted as a git patch, the commit message is the part above the "---" +line, and this will go into the git changelog when the patch is +committed. This part should be a self-contained description of what this +version of the patch does, written to make sense to anybody who comes +back to look at this commit in git in six months' time. The part below +the "---" line and above the patch proper (git format-patch puts the +diffstat here) is a good place to put remarks for people reading the +patch email, and this is where the "changes since previous version" +summary belongs. The +`git-publish `__ script can +help with tracking a good summary across versions. Also, the +`git-backport-diff `__ script +can help focus reviewers on what changed between revisions. + +.. _tips_and_tricks: + +Tips and Tricks +--------------- + +.. _proper_use_of_reviewed_by_tags_can_aid_review: + +Proper use of Reviewed-by: tags can aid review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When reviewing a large series, a reviewer can reply to some of the +patches with a Reviewed-by tag, stating that they are happy with that +patch in isolation (sometimes conditional on minor cleanup, like fixing +whitespace, that doesn't affect code content). You should then update +those commit messages by hand to include the Reviewed-by tag, so that in +the next revision, reviewers can spot which patches were already clean +from the previous round. Conversely, if you significantly modify a patch +that was previously reviewed, remove the reviewed-by tag out of the +commit message, as well as listing the changes from the previous +version, to make it easier to focus a reviewer's attention to your +changes. + +.. _if_your_patch_seems_to_have_been_ignored: + +If your patch seems to have been ignored +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your patchset has received no replies you should "ping" it after a +week or two, by sending an email as a reply-to-all to the patch mail, +including the word "ping" and ideally also a link to the page for the +patch on +`patchwork `__ or +GMANE. It's worth double-checking for reasons why your patch might have +been ignored (forgot to CC the maintainer? annoyed people by failing to +respond to review comments on an earlier version?), but often for +less-maintained areas of QEMU patches do just slip through the cracks. +If your ping is also ignored, ping again after another week or so. As +the submitter, you are the person with the most motivation to get your +patch applied, so you have to be persistent. + +.. _is_my_patch_in: + +Is my patch in? +~~~~~~~~~~~~~~~ + +Once your patch has had enough review on list, the maintainer for that +area of code will send notification to the list that they are including +your patch in a particular staging branch. Periodically, the maintainer +then sends a `pull request +`__ +for aggregating topic branches into mainline qemu. Generally, you do not +need to send a pull request unless you have contributed enough patches +to become a maintainer over a particular section of code. Maintainers +may further modify your commit, by resolving simple merge conflicts or +fixing minor typos pointed out during review, but will always add a +Signed-off-by line in addition to yours, indicating that it went through +their tree. Occasionally, the maintainer's pull request may hit more +difficult merge conflicts, where you may be requested to help rebase and +resolve the problems. It may take a couple of weeks between when your +patch first had a positive review to when it finally lands in qemu.git; +release cycle freezes may extend that time even longer. + +.. _return_the_favor: + +Return the favor +~~~~~~~~~~~~~~~~ + +Peer review only works if everyone chips in a bit of review time. If +everyone submitted more patches than they reviewed, we would have a +patch backlog. A good goal is to try to review at least as many patches +from others as what you submit. Don't worry if you don't know the code +base as well as a maintainer; it's perfectly fine to admit when your +review is weak because you are unfamiliar with the code. From edcc4e4090ac56ea0d85ec482dd77bd7cc009b70 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Sat, 30 Oct 2021 11:06:06 +0800 Subject: [PATCH 60/76] target/riscv: machine: Sort the .subsections Move the codes around so that the order of .subsections matches the one they are referenced in vmstate_riscv_cpu. Signed-off-by: Bin Meng Reviewed-by: Alistair Francis Message-id: 20211030030606.32297-1-bmeng.cn@gmail.com Signed-off-by: Alistair Francis --- target/riscv/machine.c | 100 ++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/target/riscv/machine.c b/target/riscv/machine.c index 7b4c739564..ad8248ebfd 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -76,56 +76,6 @@ static bool hyper_needed(void *opaque) return riscv_has_ext(env, RVH); } -static bool vector_needed(void *opaque) -{ - RISCVCPU *cpu = opaque; - CPURISCVState *env = &cpu->env; - - return riscv_has_ext(env, RVV); -} - -static bool pointermasking_needed(void *opaque) -{ - RISCVCPU *cpu = opaque; - CPURISCVState *env = &cpu->env; - - return riscv_has_ext(env, RVJ); -} - -static const VMStateDescription vmstate_vector = { - .name = "cpu/vector", - .version_id = 1, - .minimum_version_id = 1, - .needed = vector_needed, - .fields = (VMStateField[]) { - VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64), - VMSTATE_UINTTL(env.vxrm, RISCVCPU), - VMSTATE_UINTTL(env.vxsat, RISCVCPU), - VMSTATE_UINTTL(env.vl, RISCVCPU), - VMSTATE_UINTTL(env.vstart, RISCVCPU), - VMSTATE_UINTTL(env.vtype, RISCVCPU), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription vmstate_pointermasking = { - .name = "cpu/pointer_masking", - .version_id = 1, - .minimum_version_id = 1, - .needed = pointermasking_needed, - .fields = (VMStateField[]) { - VMSTATE_UINTTL(env.mmte, RISCVCPU), - VMSTATE_UINTTL(env.mpmmask, RISCVCPU), - VMSTATE_UINTTL(env.mpmbase, RISCVCPU), - VMSTATE_UINTTL(env.spmmask, RISCVCPU), - VMSTATE_UINTTL(env.spmbase, RISCVCPU), - VMSTATE_UINTTL(env.upmmask, RISCVCPU), - VMSTATE_UINTTL(env.upmbase, RISCVCPU), - - VMSTATE_END_OF_LIST() - } -}; - static const VMStateDescription vmstate_hyper = { .name = "cpu/hyper", .version_id = 1, @@ -164,6 +114,56 @@ static const VMStateDescription vmstate_hyper = { } }; +static bool vector_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + return riscv_has_ext(env, RVV); +} + +static const VMStateDescription vmstate_vector = { + .name = "cpu/vector", + .version_id = 1, + .minimum_version_id = 1, + .needed = vector_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64), + VMSTATE_UINTTL(env.vxrm, RISCVCPU), + VMSTATE_UINTTL(env.vxsat, RISCVCPU), + VMSTATE_UINTTL(env.vl, RISCVCPU), + VMSTATE_UINTTL(env.vstart, RISCVCPU), + VMSTATE_UINTTL(env.vtype, RISCVCPU), + VMSTATE_END_OF_LIST() + } +}; + +static bool pointermasking_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + return riscv_has_ext(env, RVJ); +} + +static const VMStateDescription vmstate_pointermasking = { + .name = "cpu/pointer_masking", + .version_id = 1, + .minimum_version_id = 1, + .needed = pointermasking_needed, + .fields = (VMStateField[]) { + VMSTATE_UINTTL(env.mmte, RISCVCPU), + VMSTATE_UINTTL(env.mpmmask, RISCVCPU), + VMSTATE_UINTTL(env.mpmbase, RISCVCPU), + VMSTATE_UINTTL(env.spmmask, RISCVCPU), + VMSTATE_UINTTL(env.spmbase, RISCVCPU), + VMSTATE_UINTTL(env.upmmask, RISCVCPU), + VMSTATE_UINTTL(env.upmbase, RISCVCPU), + + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_riscv_cpu = { .name = "cpu", .version_id = 3, From c94c239496256f1f1cb589825d052c2f3e26ebf6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 16 Nov 2021 10:50:42 +0100 Subject: [PATCH 61/76] meson.build: Merge riscv32 and riscv64 cpu family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ba0e73336200, we merged riscv32 and riscv64 in configure. However, meson does not treat them the same. We need to merge them here as well. Fixes: ba0e73336200 Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Alistair Francis Message-id: 20211116095042.335224-1-richard.henderson@linaro.org Signed-off-by: Alistair Francis --- meson.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/meson.build b/meson.build index 36540e0794..e2d38a43e6 100644 --- a/meson.build +++ b/meson.build @@ -59,6 +59,12 @@ supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64', 'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64'] cpu = host_machine.cpu_family() + +# Unify riscv* to a single family. +if cpu in ['riscv32', 'riscv64'] + cpu = 'riscv' +endif + targetos = host_machine.system() if cpu in ['x86', 'x86_64'] From 418ce0201ff6f4c9d9560eaea15ff393c2cd7412 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 12 Nov 2021 08:22:20 +0100 Subject: [PATCH 62/76] Revert "device-crash-test: Ignore errors about a bus not being available" This reverts commit ca89d15f8e42f2e5eac5bd200af38fdbfb32e875. There is already an entry for this kind of messages earlier in the ERROR_RULE_LIST - when I added this patch, I just got fooled by the other errors that occur due to a race between QMP connection and QEMU terminating early (which still spit out the 'No bus found' messages in their backtrace), but these other problems have now fortunately been tackled by John Snow, so we certainly don't need this duplicated entry here anymore. Message-Id: <20211112072220.108580-1-thuth@redhat.com> Signed-off-by: Thomas Huth --- scripts/device-crash-test | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 3db0ffe5b8..1c73dac93e 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -176,7 +176,6 @@ ERROR_RULE_LIST = [ {'log':r"Multiple VT220 operator consoles are not supported"}, {'log':r"core 0 already populated"}, {'log':r"could not find stage1 bootloader"}, - {'log':r"No '.*' bus found for device"}, # other exitcode=1 failures not listed above will just generate INFO messages: {'exitcode':1, 'loglevel':logging.INFO}, From d06f3bf922679d89815fde9eac9264ba44e37954 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 16 Nov 2021 17:33:09 +0100 Subject: [PATCH 63/76] gitlab-ci/cirrus: Increase timeout to 80 minutes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The jobs on Cirrus-CI sometimes get delayed quite a bit, waiting to be scheduled, so while the build test itself finishes within 60 minutes, the total run time of the jobs can be longer due to this waiting time. Thus let's increase the timeout on the gitlab side a little bit, so that these jobs are not marked as failing just because of the delay. Message-Id: <20211116163309.246602-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Daniel P. Berrangé Reviewed-by: Willian Rampazzo Signed-off-by: Thomas Huth --- .gitlab-ci.d/cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml index cc2f2e8906..d273a9e713 100644 --- a/.gitlab-ci.d/cirrus.yml +++ b/.gitlab-ci.d/cirrus.yml @@ -14,6 +14,7 @@ stage: build image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master needs: [] + timeout: 80m allow_failure: true script: - source .gitlab-ci.d/cirrus/$NAME.vars From f3bc3a73c908df15966e66f88d5a633bd42fd029 Mon Sep 17 00:00:00 2001 From: Peng Liang Date: Wed, 17 Nov 2021 09:47:39 +0800 Subject: [PATCH 64/76] vfio: Fix memory leak of hostwin hostwin is allocated and added to hostwin_list in vfio_host_win_add, but it is only deleted from hostwin_list in vfio_host_win_del, which causes a memory leak. Also, freeing all elements in hostwin_list is missing in vfio_disconnect_container. Fix: 2e4109de8e58 ("vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)") CC: qemu-stable@nongnu.org Signed-off-by: Peng Liang Link: https://lore.kernel.org/r/20211117014739.1839263-1-liangpeng10@huawei.com Signed-off-by: Alex Williamson --- hw/vfio/common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index dd387b0d39..080046e3f5 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -551,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); return 0; } } @@ -2239,6 +2240,7 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; + VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -2249,6 +2251,12 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); From 55cdf566412695b4fc052065c7970632129cd65b Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:43 +0000 Subject: [PATCH 65/76] qapi/qom,target/i386: sev-guest: Introduce kernel-hashes=on|off option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce new boolean 'kernel-hashes' option on the sev-guest object. It will be used to to decide whether to add the hashes of kernel/initrd/cmdline to SEV guest memory when booting with -kernel. The default value is 'off'. Signed-off-by: Dov Murik Acked-by: Brijesh Singh Signed-off-by: Daniel P. Berrangé --- qapi/qom.json | 7 ++++++- qemu-options.hx | 6 +++++- target/i386/sev.c | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index ccd1167808..eeb5395ff3 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -769,6 +769,10 @@ # @reduced-phys-bits: number of bits in physical addresses that become # unavailable when SEV is enabled # +# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a +# designated guest firmware page for measured boot +# with -kernel (default: false) (since 6.2) +# # Since: 2.12 ## { 'struct': 'SevGuestProperties', @@ -778,7 +782,8 @@ '*policy': 'uint32', '*handle': 'uint32', '*cbitpos': 'uint32', - 'reduced-phys-bits': 'uint32' } } + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool' } } ## # @ObjectType: diff --git a/qemu-options.hx b/qemu-options.hx index 7749f59300..ae2c6dbbfc 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5189,7 +5189,7 @@ SRST -object secret,id=sec0,keyid=secmaster0,format=base64,\\ data=$SECRET,iv=$(sev_device = g_strdup(value); } +static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +{ + SevGuestState *sev = SEV_GUEST(obj); + + return sev->kernel_hashes; +} + +static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +{ + SevGuestState *sev = SEV_GUEST(obj); + + sev->kernel_hashes = value; +} + static void sev_guest_class_init(ObjectClass *oc, void *data) { @@ -345,6 +360,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_session_file); object_class_property_set_description(oc, "session-file", "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); } static void From 9dbe0c93f00d3aef9ac386c390595d370cfad677 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:44 +0000 Subject: [PATCH 66/76] target/i386/sev: Add kernel hashes only if sev-guest.kernel-hashes=on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit cff03145ed3c ("sev/i386: Introduce sev_add_kernel_loader_hashes for measured linux boot", 2021-09-30) introduced measured direct boot with -kernel, using an OVMF-designated hashes table which QEMU fills. However, if OVMF doesn't designate such an area, QEMU would completely abort the VM launch. This breaks launching with -kernel using older OVMF images which don't publish the SEV_HASH_TABLE_RV_GUID. Fix that so QEMU will only look for the hashes table if the sev-guest kernel-hashes option is set to on. Otherwise, QEMU won't look for the designated area in OVMF and won't fill that area. To enable addition of kernel hashes, launch the guest with: -object sev-guest,...,kernel-hashes=on Signed-off-by: Dov Murik Reported-by: Tom Lendacky Acked-by: Brijesh Singh Signed-off-by: Daniel P. Berrangé --- target/i386/sev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index cad32812f5..e3abbeef68 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1223,6 +1223,14 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) size_t hash_len = HASH_SIZE; int aligned_len; + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_guest->kernel_hashes) { + return false; + } + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { error_setg(errp, "SEV: kernel specified but OVMF has no hash table guid"); return false; From 5a0294a21c7677498bf40a447cc4a417d51a3cf4 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:45 +0000 Subject: [PATCH 67/76] target/i386/sev: Rephrase error message when no hashes table in guest firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dov Murik Acked-by: Brijesh Singh Signed-off-by: Daniel P. Berrangé --- target/i386/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index e3abbeef68..6ff196f7ad 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1232,7 +1232,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) } if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but OVMF has no hash table guid"); + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); return false; } area = (SevHashTableDescriptor *)data; From a0190bf15044d2410e84f9a12aeac9ed56bd58b0 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:46 +0000 Subject: [PATCH 68/76] target/i386/sev: Fail when invalid hashes table area detected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit cff03145ed3c ("sev/i386: Introduce sev_add_kernel_loader_hashes for measured linux boot", 2021-09-30) introduced measured direct boot with -kernel, using an OVMF-designated hashes table which QEMU fills. However, no checks are performed on the validity of the hashes area designated by OVMF. Specifically, if OVMF publishes the SEV_HASH_TABLE_RV_GUID entry but it is filled with zeroes, this will cause QEMU to write the hashes entries over the first page of the guest's memory (GPA 0). Add validity checks to the published area. If the hashes table area's base address is zero, or its size is too small to fit the aligned hashes table, display an error and stop the guest launch. In such case, the following error will be displayed: qemu-system-x86_64: SEV: guest firmware hashes table area is invalid (base=0x0 size=0x0) Signed-off-by: Dov Murik Reported-by: Brijesh Singh Acked-by: Brijesh Singh Signed-off-by: Daniel P. Berrangé --- target/i386/sev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 6ff196f7ad..d11b512361 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1221,7 +1221,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - int aligned_len; + int aligned_len = ROUND_UP(sizeof(SevHashTable), 16); /* * Only add the kernel hashes if the sev-guest configuration explicitly @@ -1237,6 +1237,11 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return false; } area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < aligned_len) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } /* * Calculate hash of kernel command-line with the terminating null byte. If @@ -1295,7 +1300,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); /* When calling sev_encrypt_flash, the length has to be 16 byte aligned */ - aligned_len = ROUND_UP(ht->len, 16); if (aligned_len != ht->len) { /* zero the excess data so the measurement can be reliably calculated */ memset(ht->padding, 0, aligned_len - ht->len); From ddcc0d898e4040b1795254fac8ac4f8514b0ecb8 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:47 +0000 Subject: [PATCH 69/76] target/i386/sev: Perform padding calculations at compile-time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In sev_add_kernel_loader_hashes, the sizes of structs are known at compile-time, so calculate needed padding at compile-time. No functional change intended. Signed-off-by: Dov Murik Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Philippe Mathieu-Daudé Acked-by: Brijesh Singh Signed-off-by: Daniel P. Berrangé --- target/i386/sev.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index d11b512361..4fd258a570 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -110,9 +110,19 @@ typedef struct QEMU_PACKED SevHashTable { SevHashTableEntry cmdline; SevHashTableEntry initrd; SevHashTableEntry kernel; - uint8_t padding[]; } SevHashTable; +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + static SevGuestState *sev_guest; static Error *sev_mig_blocker; @@ -1216,12 +1226,12 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) uint8_t *data; SevHashTableDescriptor *area; SevHashTable *ht; + PaddedSevHashTable *padded_ht; uint8_t cmdline_hash[HASH_SIZE]; uint8_t initrd_hash[HASH_SIZE]; uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - int aligned_len = ROUND_UP(sizeof(SevHashTable), 16); /* * Only add the kernel hashes if the sev-guest configuration explicitly @@ -1237,7 +1247,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return false; } area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < aligned_len) { + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { error_setg(errp, "SEV: guest firmware hashes table area is invalid " "(base=0x%x size=0x%x)", area->base, area->size); return false; @@ -1282,7 +1292,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) * Populate the hashes table in the guest's memory at the OVMF-designated * area for the SEV hashes table */ - ht = qemu_map_ram_ptr(NULL, area->base); + padded_ht = qemu_map_ram_ptr(NULL, area->base); + ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; ht->len = sizeof(*ht); @@ -1299,13 +1310,10 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ht->kernel.len = sizeof(ht->kernel); memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); - /* When calling sev_encrypt_flash, the length has to be 16 byte aligned */ - if (aligned_len != ht->len) { - /* zero the excess data so the measurement can be reliably calculated */ - memset(ht->padding, 0, aligned_len - ht->len); - } + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (sev_encrypt_flash((uint8_t *)ht, aligned_len, errp) < 0) { + if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { return false; } From 58603ba2680fa35eade630e4b040e96953a11021 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 11 Nov 2021 10:00:48 +0000 Subject: [PATCH 70/76] target/i386/sev: Replace qemu_map_ram_ptr with address_space_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use address_space_map/unmap and check for errors. Signed-off-by: Dov Murik Acked-by: Brijesh Singh [Two lines wrapped for length - Daniel] Signed-off-by: Daniel P. Berrangé --- target/i386/sev.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 4fd258a570..025ff7a6f8 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -37,6 +37,7 @@ #include "qapi/qmp/qerror.h" #include "exec/confidential-guest-support.h" #include "hw/i386/pc.h" +#include "exec/address-spaces.h" #define TYPE_SEV_GUEST "sev-guest" OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) @@ -1232,6 +1233,9 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; /* * Only add the kernel hashes if the sev-guest configuration explicitly @@ -1292,7 +1296,12 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) * Populate the hashes table in the guest's memory at the OVMF-designated * area for the SEV hashes table */ - padded_ht = qemu_map_ram_ptr(NULL, area->base); + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; @@ -1314,10 +1323,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { - return false; + ret = false; } - return true; + address_space_unmap(&address_space_memory, padded_ht, + mapped_len, true, mapped_len); + + return ret; } static void From d05dcd94aee88728facafb993c7280547eb4d645 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Sat, 30 Jan 2021 18:46:52 +0530 Subject: [PATCH 71/76] net: vmxnet3: validate configuration values during activate (CVE-2021-20203) While activating device in vmxnet3_acticate_device(), it does not validate guest supplied configuration values against predefined minimum - maximum limits. This may lead to integer overflow or OOB access issues. Add checks to avoid it. Fixes: CVE-2021-20203 Buglink: https://bugs.launchpad.net/qemu/+bug/1913873 Reported-by: Gaoning Pan Signed-off-by: Prasad J Pandit Signed-off-by: Jason Wang --- hw/net/vmxnet3.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 41f796a247..f65af4e9ef 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1441,6 +1441,7 @@ static void vmxnet3_activate_device(VMXNET3State *s) vmxnet3_setup_rx_filtering(s); /* Cache fields from shared memory */ s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); + assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); VMW_CFPRN("MTU is %u", s->mtu); s->max_rx_frags = @@ -1486,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Read rings memory locations for TX queues */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); + if (size > VMXNET3_TX_RING_MAX_SIZE) { + size = VMXNET3_TX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, sizeof(struct Vmxnet3_TxDesc), false); @@ -1496,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* TXC ring */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); + if (size > VMXNET3_TC_RING_MAX_SIZE) { + size = VMXNET3_TC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_TxCompDesc), true); VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); @@ -1537,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RX rings */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); + if (size > VMXNET3_RX_RING_MAX_SIZE) { + size = VMXNET3_RX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, sizeof(struct Vmxnet3_RxDesc), false); VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", @@ -1546,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RXC ring */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); + if (size > VMXNET3_RC_RING_MAX_SIZE) { + size = VMXNET3_RC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_RxCompDesc), true); VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); From fb5eca4a571e303aafac7130abd66adc184aae72 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Thu, 18 Nov 2021 11:20:10 +0800 Subject: [PATCH 72/76] net/colo-compare.c: Fix ACK track reverse issue The TCP protocol ACK maybe bigger than uint32_t MAX. At this time, the ACK will reverse to 0. This patch fix the max_ack and min_ack track issue. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index b8876d7fd9..1225f40e41 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -209,7 +209,8 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) pkt->tcp_seq = ntohl(tcphd->th_seq); pkt->tcp_ack = ntohl(tcphd->th_ack); - *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack; + /* Need to consider ACK will bigger than uint32_t MAX */ + *max_ack = pkt->tcp_ack - *max_ack > 0 ? pkt->tcp_ack : *max_ack; pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data + (tcphd->th_off << 2); pkt->payload_size = pkt->size - pkt->header_size; @@ -413,7 +414,8 @@ static void colo_compare_tcp(CompareState *s, Connection *conn) * can ensure that the packet's payload is acknowledged by * primary and secondary. */ - uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack; + uint32_t min_ack = conn->pack - conn->sack > 0 ? + conn->sack : conn->pack; pri: if (g_queue_is_empty(&conn->primary_list)) { From 0656fbc7ddccdade1709742a9b56ae07dd3c280a Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Thu, 18 Nov 2021 11:20:11 +0800 Subject: [PATCH 73/76] net/colo-compare.c: Fix incorrect return when input wrong size Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index 1225f40e41..b966e7e514 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -807,7 +807,7 @@ static int compare_chr_send(CompareState *s, } if (!size) { - return 0; + return -1; } entry = g_slice_new(SendEntry); From 9fc6e86e8b69e2e672cbb9c25cddc3b9deb96afb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 9 Sep 2021 11:43:08 +0200 Subject: [PATCH 74/76] hw/nvme: reattach subsystem namespaces on hotplug With commit 5ffbaeed16 ("hw/nvme: fix controller hot unplugging") namespaces get moved from the controller to the subsystem if one is specified. That keeps the namespaces alive after a controller hot-unplug, but after a controller hotplug we have to reconnect the namespaces from the subsystem to the controller. Fixes: 5ffbaeed16 ("hw/nvme: fix controller hot unplugging") Cc: Klaus Jensen Reviewed-by: Keith Busch Signed-off-by: Hannes Reinecke [k.jensen: only attach to shared and non-detached namespaces] Signed-off-by: Klaus Jensen --- hw/nvme/subsys.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 495dcff5eb..fb58d63950 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -14,7 +14,7 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) { NvmeSubsystem *subsys = n->subsys; - int cntlid; + int cntlid, nsid; for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { if (!subsys->ctrls[cntlid]) { @@ -29,12 +29,20 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) subsys->ctrls[cntlid] = n; + for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { + NvmeNamespace *ns = subsys->namespaces[nsid]; + if (ns && ns->params.shared && !ns->params.detached) { + nvme_attach_ns(n, ns); + } + } + return cntlid; } void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) { subsys->ctrls[n->cntlid] = NULL; + n->cntlid = -1; } static void nvme_subsys_setup(NvmeSubsystem *subsys) From 916b0f0b5264759c581badfdc0e1c5f98362dbda Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 24 Sep 2021 08:52:22 +0200 Subject: [PATCH 75/76] hw/nvme: change nvme-ns 'shared' default Change namespaces to be shared namespaces by default (parameter shared=on). Keep shared=off for older machine types. Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- docs/system/devices/nvme.rst | 24 ++++++++++++++---------- hw/core/machine.c | 1 + hw/nvme/ns.c | 8 +------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index bff72d1c24..a1c0db01f6 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -110,28 +110,32 @@ multipath I/O. This will create an NVM subsystem with two controllers. Having controllers linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters: -``shared`` (default: ``off``) +``shared`` (default: ``on`` since 6.2) Specifies that the namespace will be attached to all controllers in the - subsystem. If set to ``off`` (the default), the namespace will remain a - private namespace and may only be attached to a single controller at a time. + subsystem. If set to ``off``, the namespace will remain a private namespace + and may only be attached to a single controller at a time. Shared namespaces + are always automatically attached to all controllers (also when controllers + are hotplugged). ``detached`` (default: ``off``) If set to ``on``, the namespace will be be available in the subsystem, but - not attached to any controllers initially. + not attached to any controllers initially. A shared namespace with this set + to ``on`` will never be automatically attached to controllers. Thus, adding .. code-block:: console -drive file=nvm-1.img,if=none,id=nvm-1 - -device nvme-ns,drive=nvm-1,nsid=1,shared=on + -device nvme-ns,drive=nvm-1,nsid=1 -drive file=nvm-2.img,if=none,id=nvm-2 - -device nvme-ns,drive=nvm-2,nsid=3,detached=on + -device nvme-ns,drive=nvm-2,nsid=3,shared=off,detached=on -will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is -initially attached to both controllers. NSID 3 will be a private namespace -(i.e. only attachable to a single controller at a time) and will not be -attached to any controller initially (due to ``detached=on``). +will cause NSID 1 will be a shared namespace that is initially attached to both +controllers. NSID 3 will be a private namespace due to ``shared=off`` and only +attachable to a single controller at a time. Additionally it will not be +attached to any controller initially (due to ``detached=on``) or to hotplugged +controllers. Optional Features ================= diff --git a/hw/core/machine.c b/hw/core/machine.c index 26ec54e726..53a99abc56 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,6 +39,7 @@ GlobalProperty hw_compat_6_1[] = { { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, }; const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index b7cf1494e7..8b5f98c761 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -465,12 +465,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) "linked to an nvme-subsys device"); return; } - - if (ns->params.shared) { - error_setg(errp, "shared requires that the nvme device is " - "linked to an nvme-subsys device"); - return; - } } else { /* * If this namespace belongs to a subsystem (through a link on the @@ -532,7 +526,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) static Property nvme_ns_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), - DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false), + DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), From e2c57529c9306e4c9aac75d9879f6e7699584a22 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Wed, 17 Nov 2021 14:12:56 +0100 Subject: [PATCH 76/76] hw/nvme: fix buffer overrun in nvme_changed_nslist (CVE-2021-3947) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missing offset verification. Cc: qemu-stable@nongnu.org Cc: Philippe Mathieu-Daudé Reported-by: Qiuhao Li Fixes: f432fdfa121 ("support changed namespace asynchronous event") Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6a571d18cf..5f573c417b 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4168,6 +4168,11 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, int i = 0; uint32_t nsid; + if (off >= sizeof(nslist)) { + trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist)); + return NVME_INVALID_FIELD | NVME_DNR; + } + memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len);