From cd523a4181b152004fc0aed18381aefe600ac38e Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 2 Nov 2021 16:51:57 +0100
Subject: [PATCH 01/76] net/vhost-vdpa: fix memory leak in
 vhost_vdpa_get_max_queue_pairs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use g_autofree to ensure that `config` is freed when
vhost_vdpa_get_max_queue_pairs() returns.

Reported-by: Coverity (CID 1465228: RESOURCE_LEAK)
Fixes: 402378407d ("vhost-vdpa: multiqueue support")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Message-Id: <20211102155157.241034-1-sgarzare@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 net/vhost-vdpa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 49ab322511..373b706b90 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -214,7 +214,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
 static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
 {
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
-    struct vhost_vdpa_config *config;
+    g_autofree struct vhost_vdpa_config *config = NULL;
     __virtio16 *max_queue_pairs;
     uint64_t features;
     int ret;

From b66cecb238d065628b85a18357d28d21618a4580 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 2 Nov 2021 16:33:42 +0000
Subject: [PATCH 02/76] softmmu/qdev-monitor: fix use-after-free in
 qdev_set_id()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by Coverity (CID 1465222).

Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id")
Cc: Damien Hedde <damien.hedde@greensocs.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20211102163342.31162-1-stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Damien Hedde <damien.hedde@greensocs.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
---
 softmmu/qdev-monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index f8b3a4cd82..588a62b88d 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp)
         if (prop) {
             dev->id = id;
         } else {
-            g_free(id);
             error_setg(errp, "Duplicate device ID '%s'", id);
+            g_free(id);
             return NULL;
         }
     } else {

From 245cf2c24eb0d5a4fe75151e1794aa5cd53b130d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 4 Nov 2021 09:56:24 +0100
Subject: [PATCH 03/76] vhost: Rename last_index to vq_index_end
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The doc of this field pointed out that last_index is the last vq index.
This is misleading, since it's actually one past the end of the vqs.

Renaming and modifying comment.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20211104085625.2054959-2-eperezma@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vhost_net.c        | 4 ++--
 hw/virtio/vhost-vdpa.c    | 2 +-
 include/hw/virtio/vhost.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 0d888f29a6..29f2c4212f 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -232,10 +232,10 @@ fail:
 }
 
 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
-                                   int last_index)
+                                   int vq_index_end)
 {
     net->dev.vq_index = vq_index;
-    net->dev.last_index = last_index;
+    net->dev.vq_index_end = vq_index_end;
 }
 
 static int vhost_net_start_one(struct vhost_net *net,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0d8051426c..bcaf00e09f 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -645,7 +645,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
         vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
     }
 
-    if (dev->vq_index + dev->nvqs != dev->last_index) {
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
         return 0;
     }
 
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 3fa0b554ef..58a73e7b7a 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -74,8 +74,8 @@ struct vhost_dev {
     unsigned int nvqs;
     /* the first virtqueue which would be used by this vhost dev */
     int vq_index;
-    /* the last vq index for the virtio device (not vhost) */
-    int last_index;
+    /* one past the last vq index for the virtio device (not vhost) */
+    int vq_index_end;
     /* if non-zero, minimum required value for max_queues */
     int num_queues;
     uint64_t features;

From 14c81b2191fffb61cd6d2a7a839d34ec2d5e09a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 4 Nov 2021 09:56:25 +0100
Subject: [PATCH 04/76] vhost: Fix last vq queue index of devices with no cvq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The -1 assumes that cvq device model is accounted in data_queue_pairs,
if cvq does not exists, but it's actually the opposite: Devices with
!cvq are ok but devices with cvq does not add the last queue to
data_queue_pairs.

This is not a problem to vhost-net, but it is to vhost-vdpa:
* Devices with cvq gets initialized at last data vq device model, not
at cvq one.
* Devices with !cvq never gets initialized, since last_index is the
first queue of the last device model.

Because of that, the right change in last_index is to actually add the
cvq, not to remove the missing one.

This is not a problem to vhost-net, but it is to vhost-vdpa, which
device model trust to reach the last index to finish starting the
device.

Also, as the previous commit, rename it to index_end.

Tested with vp_vdpa with host's vhost=on and vhost=off, with ctrl_vq=on
and ctrl_vq=off.

Fixes: 049eb15b5fc9 ("vhost: record the last virtqueue index for the virtio device")
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20211104085625.2054959-3-eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vhost_net.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 29f2c4212f..30379d2ca4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -326,11 +326,11 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     VirtIONet *n = VIRTIO_NET(dev);
     int nvhosts = data_queue_pairs + cvq;
     struct vhost_net *net;
-    int r, e, i, last_index = data_queue_pairs * 2;
+    int r, e, i, index_end = data_queue_pairs * 2;
     NetClientState *peer;
 
-    if (!cvq) {
-        last_index -= 1;
+    if (cvq) {
+        index_end += 1;
     }
 
     if (!k->set_guest_notifiers) {
@@ -347,7 +347,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         }
 
         net = get_vhost_net(peer);
-        vhost_net_set_vq_index(net, i * 2, last_index);
+        vhost_net_set_vq_index(net, i * 2, index_end);
 
         /* Suppress the masking guest notifiers on vhost user
          * because vhost user doesn't interrupt masking/unmasking

From be81ba604260f4beae1522241c579eeda1b891fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <f4bug@amsat.org>
Date: Sat, 6 Nov 2021 15:50:16 +0100
Subject: [PATCH 05/76] hw/mem/pc-dimm: Restrict NUMA-specific code to NUMA
 machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When trying to use the pc-dimm device on a non-NUMA machine, we get:

  $ qemu-system-arm -M none -cpu max -S \
      -object memory-backend-file,id=mem1,size=1M,mem-path=/tmp/1m \
      -device pc-dimm,id=dimm1,memdev=mem1
  Segmentation fault (core dumped)

  (gdb) bt
  #0  pc_dimm_realize (dev=0x555556da3e90, errp=0x7fffffffcd10) at hw/mem/pc-dimm.c:184
  #1  0x0000555555fe1f8f in device_set_realized (obj=0x555556da3e90, value=true, errp=0x7fffffffce18) at hw/core/qdev.c:531
  #2  0x0000555555feb4a9 in property_set_bool (obj=0x555556da3e90, v=0x555556e54420, name=0x5555563c3c41 "realized", opaque=0x555556a704f0, errp=0x7fffffffce18) at qom/object.c:2257

To avoid that crash, restrict the pc-dimm NUMA check to machines
supporting NUMA, and do not allow the use of 'node' property on
non-NUMA machines.

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20211106145016.611332-1-f4bug@amsat.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/mem/pc-dimm.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index a3a2560301..48b913aba6 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -181,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MachineState *ms = MACHINE(qdev_get_machine());
-    int nb_numa_nodes = ms->numa_state->num_nodes;
+
+    if (ms->numa_state) {
+        int nb_numa_nodes = ms->numa_state->num_nodes;
+
+        if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
+            (!nb_numa_nodes && dimm->node)) {
+            error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+                       PRIu32 "' which exceeds the number of numa nodes: %d",
+                       dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
+            return;
+        }
+    } else if (dimm->node > 0) {
+        error_setg(errp, "machine doesn't support NUMA");
+        return;
+    }
 
     if (!dimm->hostmem) {
         error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
@@ -191,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
                    object_get_canonical_path_component(OBJECT(dimm->hostmem)));
         return;
     }
-    if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
-        (!nb_numa_nodes && dimm->node)) {
-        error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
-                   PRIu32 "' which exceeds the number of numa nodes: %d",
-                   dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
-        return;
-    }
 
     if (ddc->realize) {
         ddc->realize(dimm, errp);

From 2aa1842d6d79dcd1b84c58eeb44591a99a9e56df Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Fri, 12 Nov 2021 06:08:53 -0500
Subject: [PATCH 06/76] pcie: rename 'native-hotplug' to 'x-native-hotplug'

Mark property as experimental/internal adding 'x-' prefix.

Property was introduced in 6.1 and it should have provided
ability to turn on native PCIE hotplug on port even when
ACPI PCI hotplug is in use is user explicitly sets property
on CLI. However that never worked since slot is wired to
ACPI hotplug controller.
Another non-intended usecase: disable native hotplug on slot
when APCI based hotplug is disabled, which works but slot has
'hotplug' property for this taks.

It should be relatively safe to rename it to experimental
as no users should exist for it and given that the property
is broken we don't really want to leave it around for much
longer lest users start using it.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Message-Id: <20211112110857.3116853-2-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/pc_q35.c   | 2 +-
 hw/pci/pcie_port.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 797e09500b..fc34b905ee 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -243,7 +243,7 @@ static void pc_q35_init(MachineState *machine)
                                           NULL);
 
     if (acpi_pcihp) {
-        object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug",
+        object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug",
                                    "false", true);
     }
 
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index da850e8dde..e95c1e5519 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -148,7 +148,7 @@ static Property pcie_slot_props[] = {
     DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
     DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
     DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true),
-    DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true),
+    DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true),
     DEFINE_PROP_END_OF_LIST()
 };
 

From c318bef76206c2ecb6016e8e68c4ac6ff9a4c8cb Mon Sep 17 00:00:00 2001
From: Julia Suvorova <jusual@redhat.com>
Date: Fri, 12 Nov 2021 06:08:54 -0500
Subject: [PATCH 07/76] hw/acpi/ich9: Add compat prop to keep HPC bit set for
 6.1 machine type

To solve issues [1-2] the Hot Plug Capable bit in PCIe Slots will be
turned on, while the switch to ACPI Hot-plug will be done in the
DSDT table.

Introducing 'x-keep-native-hpc' property disables the HPC bit only
in 6.1 and as a result keeps the forced 'reserve-io' on
pcie-root-ports in 6.1 too.

[1] https://gitlab.com/qemu-project/qemu/-/issues/641
[2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409

Signed-off-by: Julia Suvorova <jusual@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20211112110857.3116853-3-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/acpi/ich9.c         | 18 ++++++++++++++++++
 hw/i386/pc.c           |  2 ++
 hw/i386/pc_q35.c       |  7 ++++++-
 include/hw/acpi/ich9.h |  1 +
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 1ee2ba2c50..ebe08ed831 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -419,6 +419,20 @@ static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp)
     s->pm.use_acpi_hotplug_bridge = value;
 }
 
+static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    return s->pm.keep_pci_slot_hpc;
+}
+
+static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    s->pm.keep_pci_slot_hpc = value;
+}
+
 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
 {
     static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
@@ -428,6 +442,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
     pm->disable_s4 = 0;
     pm->s4_val = 2;
     pm->use_acpi_hotplug_bridge = true;
+    pm->keep_pci_slot_hpc = true;
 
     object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE,
                                    &pm->pm_io_base, OBJ_PROP_FLAG_READ);
@@ -454,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
     object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
                              ich9_pm_get_acpi_pci_hotplug,
                              ich9_pm_set_acpi_pci_hotplug);
+    object_property_add_bool(obj, "x-keep-pci-slot-hpc",
+                             ich9_pm_get_keep_pci_slot_hpc,
+                             ich9_pm_set_keep_pci_slot_hpc);
 }
 
 void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2592a82148..a2ef40ecbc 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_1[] = {
     { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" },
     { TYPE_X86_CPU, "hv-version-id-major", "0x0006" },
     { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" },
+    { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" },
 };
 const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1);
 
@@ -107,6 +108,7 @@ GlobalProperty pc_compat_6_0[] = {
     { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" },
     { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" },
     { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" },
+    { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" },
 };
 const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index fc34b905ee..e1e100316d 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -137,6 +137,7 @@ static void pc_q35_init(MachineState *machine)
     DriveInfo *hd[MAX_SATA_PORTS];
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     bool acpi_pcihp;
+    bool keep_pci_slot_hpc;
 
     /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
      * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -242,7 +243,11 @@ static void pc_q35_init(MachineState *machine)
                                           ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
                                           NULL);
 
-    if (acpi_pcihp) {
+    keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc),
+                                                 "x-keep-pci-slot-hpc",
+                                                 NULL);
+
+    if (!keep_pci_slot_hpc && acpi_pcihp) {
         object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug",
                                    "false", true);
     }
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index f04f1791bd..7ca92843c6 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -56,6 +56,7 @@ typedef struct ICH9LPCPMRegs {
     AcpiCpuHotplug gpe_cpu;
     CPUHotplugState cpuhp_state;
 
+    bool keep_pci_slot_hpc;
     bool use_acpi_hotplug_bridge;
     AcpiPciHpState acpi_pci_hotplug;
     MemHotplugState acpi_memory_hotplug;

From be12e3a016f10f4daa1a248df97198f4b10ef8c4 Mon Sep 17 00:00:00 2001
From: Julia Suvorova <jusual@redhat.com>
Date: Fri, 12 Nov 2021 06:08:55 -0500
Subject: [PATCH 08/76] bios-tables-test: Allow changes in DSDT ACPI tables

Prepare for changing the _OSC method in q35 DSDT.

Signed-off-by: Julia Suvorova <jusual@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Ani Sinha <ani@anisinha.ca>
Message-Id: <20211112110857.3116853-4-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/qtest/bios-tables-test-allowed-diff.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..48e5634d4b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,17 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/q35/DSDT",
+"tests/data/acpi/q35/DSDT.tis",
+"tests/data/acpi/q35/DSDT.bridge",
+"tests/data/acpi/q35/DSDT.mmio64",
+"tests/data/acpi/q35/DSDT.ipmibt",
+"tests/data/acpi/q35/DSDT.cphp",
+"tests/data/acpi/q35/DSDT.memhp",
+"tests/data/acpi/q35/DSDT.acpihmat",
+"tests/data/acpi/q35/DSDT.numamem",
+"tests/data/acpi/q35/DSDT.dimmpxm",
+"tests/data/acpi/q35/DSDT.nohpet",
+"tests/data/acpi/q35/DSDT.tis.tpm2",
+"tests/data/acpi/q35/DSDT.tis.tpm12",
+"tests/data/acpi/q35/DSDT.multi-bridge",
+"tests/data/acpi/q35/DSDT.ivrs",
+"tests/data/acpi/q35/DSDT.xapic",

From 211afe5c69b597acf85fdd577eb497f5be1ffbd8 Mon Sep 17 00:00:00 2001
From: Julia Suvorova <jusual@redhat.com>
Date: Fri, 12 Nov 2021 06:08:56 -0500
Subject: [PATCH 09/76] hw/i386/acpi-build: Deny control on PCIe Native
 Hot-plug in _OSC

There are two ways to enable ACPI PCI Hot-plug:

        * Disable the Hot-plug Capable bit on PCIe slots.

This was the first approach which led to regression [1-2], as
I/O space for a port is allocated only when it is hot-pluggable,
which is determined by HPC bit.

        * Leave the HPC bit on and disable PCIe Native Hot-plug in _OSC
          method.

This removes the (future) ability of hot-plugging switches with PCIe
Native hotplug since ACPI PCI Hot-plug only works with cold-plugged
bridges. If the user wants to explicitely use this feature, they can
disable ACPI PCI Hot-plug with:
        --global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off

Change the bit in _OSC method so that the OS selects ACPI PCI Hot-plug
instead of PCIe Native.

[1] https://gitlab.com/qemu-project/qemu/-/issues/641
[2] https://bugzilla.redhat.com/show_bug.cgi?id=2006409

Signed-off-by: Julia Suvorova <jusual@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20211112110857.3116853-5-imammedo@redhat.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/acpi-build.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a3ad6abd33..a99c6e4fe3 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1337,7 +1337,7 @@ static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr)
     aml_append(table, scope);
 }
 
-static Aml *build_q35_osc_method(void)
+static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug)
 {
     Aml *if_ctx;
     Aml *if_ctx2;
@@ -1359,8 +1359,10 @@ static Aml *build_q35_osc_method(void)
     /*
      * Always allow native PME, AER (no dependencies)
      * Allow SHPC (PCI bridges can have SHPC controller)
+     * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled.
      */
-    aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl));
+    aml_append(if_ctx, aml_and(a_ctrl,
+        aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl));
 
     if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1))));
     /* Unknown revision */
@@ -1449,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
-        aml_append(dev, build_q35_osc_method());
+        aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en));
         aml_append(sb_scope, dev);
         if (mcfg_valid) {
             aml_append(sb_scope, build_q35_dram_controller(&mcfg));
@@ -1565,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             if (pci_bus_is_express(bus)) {
                 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
                 aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
-                aml_append(dev, build_q35_osc_method());
+
+                /* Expander bridges do not have ACPI PCI Hot-plug enabled */
+                aml_append(dev, build_q35_osc_method(true));
             } else {
                 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
             }

From 7e6055c99f2f1f5e6a206e8d07a9f45508832611 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Fri, 12 Nov 2021 06:08:57 -0500
Subject: [PATCH 10/76] tests: bios-tables-test update expected blobs

The changes are the result of
        'hw/i386/acpi-build: Deny control on PCIe Native Hot-Plug in _OSC'
which hides PCIE hotplug bit in host-bridge _OSC

Method (_OSC, 4, NotSerialized)  // _OSC: Operating System Capabilities
             {
                 CreateDWordField (Arg3, Zero, CDW1)
                 If ((Arg0 == ToUUID ("33db4d5b-1ff7-401c-9657-7441c03dd766") /* PCI Host Bridge Device */))
                 {
                     CreateDWordField (Arg3, 0x04, CDW2)
                     CreateDWordField (Arg3, 0x08, CDW3)
                     Local0 = CDW3 /* \_SB_.PCI0._OSC.CDW3 */
-                    Local0 &= 0x1F
+                    Local0 &= 0x1E

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20211112110857.3116853-6-imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/data/acpi/q35/DSDT                    | Bin 8289 -> 8289 bytes
 tests/data/acpi/q35/DSDT.acpihmat           | Bin 9614 -> 9614 bytes
 tests/data/acpi/q35/DSDT.bridge             | Bin 11003 -> 11003 bytes
 tests/data/acpi/q35/DSDT.cphp               | Bin 8753 -> 8753 bytes
 tests/data/acpi/q35/DSDT.dimmpxm            | Bin 9943 -> 9943 bytes
 tests/data/acpi/q35/DSDT.ipmibt             | Bin 8364 -> 8364 bytes
 tests/data/acpi/q35/DSDT.ivrs               | Bin 8306 -> 8306 bytes
 tests/data/acpi/q35/DSDT.memhp              | Bin 9648 -> 9648 bytes
 tests/data/acpi/q35/DSDT.mmio64             | Bin 9419 -> 9419 bytes
 tests/data/acpi/q35/DSDT.multi-bridge       | Bin 8583 -> 8583 bytes
 tests/data/acpi/q35/DSDT.nohpet             | Bin 8147 -> 8147 bytes
 tests/data/acpi/q35/DSDT.numamem            | Bin 8295 -> 8295 bytes
 tests/data/acpi/q35/DSDT.tis.tpm12          | Bin 8894 -> 8894 bytes
 tests/data/acpi/q35/DSDT.tis.tpm2           | Bin 8894 -> 8894 bytes
 tests/data/acpi/q35/DSDT.xapic              | Bin 35652 -> 35652 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |  16 ----------------
 16 files changed, 16 deletions(-)

diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT
index 281fc82c03b2562d2e6b7caec0d817b034a47138..c1965f6051ef2af81dd8412abe169d87845bb033 100644
GIT binary patch
delta 24
gcmaFp@X&$FCD<h-QGtPh@z+GID~xg*?>ET<0BnZ{w*UYD

delta 24
gcmaFp@X&$FCD<h-QGtPh@#jRYD~$3R?>ET<0BnK?w*UYD

diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat
index 8c1e05a11a328ec1cc6f86e36e52c28f41f9744e..f24d4874bff8d327a165ed7c36de507aea114edd 100644
GIT binary patch
delta 24
fcmeD4?(^ny33dtTQ)OUa+&+=(3ZvY{`|DKzU@Hhn

delta 24
fcmeD4?(^ny33dtTQ)OUa+%}Qx3ZwkS`|DKzU?vDi

diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge
index 6f1464b6c712d7f33cb4b891b7ce76fe228f44c9..424d51bd1cb39ea73501ef7d0044ee52cec5bdac 100644
GIT binary patch
delta 24
gcmewz`a6`%CD<k8w-y5fBg;gtD~xg*@5^Wb0CWThXaE2J

delta 24
gcmewz`a6`%CD<k8w-y5fBlASAD~$3R@5^Wb0CWEcXaE2J

diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp
index f8337ff5191a37a47dcf7c09a6c39c4e704a15bf..f1275606f68eeba54bfb11e63d818420385a62b9 100644
GIT binary patch
delta 24
fcmdn!veAXhCD<jzP>F$oF>WH)6-K#@_k$DxTWtqt

delta 24
fcmdn!veAXhCD<jzP>F$oF?J%?6-N1u_k$DxTWAMo

diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm
index fe5820d93d057ef09a001662369b15afbc5b87e2..76e451e829ec4c245315f7eed8731aa1be45a747 100644
GIT binary patch
delta 24
gcmccad)=4ICD<k8x*7umqsK(9D~xg*@BdH*0B$o00{{R3

delta 24
gcmccad)=4ICD<k8x*7umqx(dzD~$3R@BdH*0B$Y`0{{R3

diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt
index 631741065860fd5036aa303904dabd1d2839f9c6..6ad2411d0ec95f204cfc64b430c537bce09c35bd 100644
GIT binary patch
delta 24
gcmZ4ExW<voCD<iojRFG$quxZWD~xg*@1K<i0AMf(82|tP

delta 24
gcmZ4ExW<voCD<iojRFG$qwYkmD~$3R@1K<i0AMQ!82|tP

diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs
index b0eafe90e5832935557ec5e6802c0147c88f379c..cad26e3f0c27a40a33101155a5282ed9bcb1d441 100644
GIT binary patch
delta 24
gcmez5@X3M8CD<jTNP&TYan?kxD~xg*?@yKo0BrUMn*aa+

delta 24
gcmez5@X3M8CD<jTNP&TYappv>D~$3R?@yKo0BrFHn*aa+

diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp
index 9bc11518fc57687ca789dc70793b48b29a0d74ed..4e9cb3dc6896bb79ccac0fe342a404549f6610e8 100644
GIT binary patch
delta 24
gcmdnsy}_HyCD<iogDL|9<C}?GR~Y3s-oK~<0BV~F1poj5

delta 24
gcmdnsy}_HyCD<iogDL|9<LilBR~Y3t-oK~<0BV*A1poj5

diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64
index 713288a12ef2b70a20b4e8836c036ba4db3a57a8..eb5a1c7171c02d153487bfcecfb7019b7c1bf406 100644
GIT binary patch
delta 24
gcmX@@dD@f9CD<k8v<d?Q<BEw~R~Y3s-hZPE0B$h}CjbBd

delta 24
gcmX@@dD@f9CD<k8v<d?Q<MN4IR~Y3t-hZPE0B$S^CjbBd

diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge
index a24c713d22102a1a1583b5c902edffe1694e5cfe..45808eb03b78d07ebbe853f674abfed589d35e26 100644
GIT binary patch
delta 24
fcmZp7Zg=K#33dr-S7cyd?3>7Sg;8$f{S^uTTRaEr

delta 24
fcmZp7Zg=K#33dr-S7cyd?48JUg;9Rv{S^uTTQ>*m

diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet
index e8202e6ddfbe96071f32f1ec05758f650569943e..83d1aa00ac5686df479673fb0d7830f946e25dea 100644
GIT binary patch
delta 24
gcmca?f7zbPCD<k8vOEI=<B^G6R~Y3s-v1&80B=4CMF0Q*

delta 24
gcmca?f7zbPCD<k8vOEI=<Kc;1R~Y3t-v1&80B<=7MF0Q*

diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem
index 151e7cf42953f3f5fe61ff0140ab7b976fe9e5b8..050aaa237b466b0bda8cca5cfaa06f84661d622e 100644
GIT binary patch
delta 24
gcmaFv@Z5pRCD<h-U4emtamhrkD~xg*?{~-p0BWTOc>n+a

delta 24
gcmaFv@Z5pRCD<h-U4emtaq&d1D~$3R?{~-p0BWEJc>n+a

diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12
index c96b5277a14ae98174408d690d6e0246bd932623..0ebdf6fbd77967f1ab5d5337b7b1fed314cfaca8 100644
GIT binary patch
delta 24
gcmdnzy3du%CD<iopArKDqxwXyD~xg*@84Gh0A(f!W&i*H

delta 24
gcmdnzy3du%CD<iopArKDquNBSD~$3R@84Gh0A(QvW&i*H

diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2
index c92d4d29c79352a60974ea9f665d0b9a410a4bac..dcbb7f0af377425db53130e8ba1c62c09c22e006 100644
GIT binary patch
delta 24
gcmdnzy3du%CD<iopArKD<D-dOR~Y3s-oLL10Bm##ApigX

delta 24
gcmdnzy3du%CD<iopArKD<HLzuR~Y3t-oLL10BmmwApigX

diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic
index 119fc90f1f8a7b6934df6fd95609446e627ce15d..17552ce363ae81985f69f9ae85837a1540b79ae0 100644
GIT binary patch
delta 26
icmX>yjp@iVCN7s?mk^h31_s6r6S=N1%5A)#+64f6_X&Rh

delta 26
icmX>yjp@iVCN7s?mk^h31_s9U6S=N1%5S`%+64f6@(F(c

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index 48e5634d4b..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,17 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/q35/DSDT",
-"tests/data/acpi/q35/DSDT.tis",
-"tests/data/acpi/q35/DSDT.bridge",
-"tests/data/acpi/q35/DSDT.mmio64",
-"tests/data/acpi/q35/DSDT.ipmibt",
-"tests/data/acpi/q35/DSDT.cphp",
-"tests/data/acpi/q35/DSDT.memhp",
-"tests/data/acpi/q35/DSDT.acpihmat",
-"tests/data/acpi/q35/DSDT.numamem",
-"tests/data/acpi/q35/DSDT.dimmpxm",
-"tests/data/acpi/q35/DSDT.nohpet",
-"tests/data/acpi/q35/DSDT.tis.tpm2",
-"tests/data/acpi/q35/DSDT.tis.tpm12",
-"tests/data/acpi/q35/DSDT.multi-bridge",
-"tests/data/acpi/q35/DSDT.ivrs",
-"tests/data/acpi/q35/DSDT.xapic",

From f463e761a41ee71e59892121e1c74d9c25c985d2 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 11 Nov 2021 14:38:53 +0800
Subject: [PATCH 11/76] virtio: use virtio accessor to access packed descriptor
 flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We used to access packed descriptor flags via
address_space_{write|read}_cached(). When we hit the cache, memcpy()
is used which is not an atomic operation which may lead a wrong value
is read or wrote.

So this patch switches to use virito_{stw|lduw}_phys_cached() to make
sure the aceess is atomic.

Fixes: 86044b24e865f ("virtio: basic packed virtqueue support")
Cc: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20211111063854.29060-1-jasowang@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index cc69a9b881..939bcbfeb9 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -507,11 +507,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev,
                                          MemoryRegionCache *cache,
                                          int i)
 {
-    address_space_read_cached(cache,
-                              i * sizeof(VRingPackedDesc) +
-                              offsetof(VRingPackedDesc, flags),
-                              flags, sizeof(*flags));
-    virtio_tswap16s(vdev, flags);
+    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
+
+    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 }
 
 static void vring_packed_desc_read(VirtIODevice *vdev,
@@ -564,8 +562,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 {
     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 
-    virtio_tswap16s(vdev, &desc->flags);
-    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
+    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 }
 

From d152cdd6f6fad381e804c8185f0ba938030ccac9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 11 Nov 2021 14:38:54 +0800
Subject: [PATCH 12/76] virtio: use virtio accessor to access packed event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We used to access packed descriptor event and off_wrap via
address_space_{write|read}_cached(). When we hit the cache, memcpy()
is used which is not atomic which may lead a wrong value to be read or
wrote.

This patch fixes this by switching to use
virito_{stw|lduw}_phys_cached() to make sure the access is atomic.

Fixes: 683f7665679c1 ("virtio: event suppression support for packed ring")
Cc: qemu-stable@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20211111063854.29060-2-jasowang@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 939bcbfeb9..ea7c079fb0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -247,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev,
     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 
-    address_space_read_cached(cache, off_flags, &e->flags,
-                              sizeof(e->flags));
+    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
     /* Make sure flags is seen before off_wrap */
     smp_rmb();
-    address_space_read_cached(cache, off_off, &e->off_wrap,
-                              sizeof(e->off_wrap));
-    virtio_tswap16s(vdev, &e->off_wrap);
+    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
     virtio_tswap16s(vdev, &e->flags);
 }
 
@@ -263,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 {
     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 
-    virtio_tswap16s(vdev, &off_wrap);
-    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
+    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 }
 
@@ -273,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev,
 {
     hwaddr off = offsetof(VRingPackedDescEvent, flags);
 
-    virtio_tswap16s(vdev, &flags);
-    address_space_write_cached(cache, off, &flags, sizeof(flags));
+    virtio_stw_phys_cached(vdev, cache, off, flags);
     address_space_cache_invalidate(cache, off, sizeof(flags));
 }
 

From 0351152b6f16e23e65407b4c4b27e23b46aa3801 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Fri, 12 Nov 2021 20:34:30 +0100
Subject: [PATCH 13/76] vdpa: Replace qemu_open_old by qemu_open at
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no reason to keep using the old one, since we neither use the
variadics arguments nor open it with O_DIRECT.

Also, net_client_init1, the caller of net_init_vhost_vdpa, wants all
net_client_init_fun to use Error API, so it's a good step in that
direction.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20211112193431.2379298-2-eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net/vhost-vdpa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 373b706b90..1a7250b980 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -261,7 +261,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
     opts = &netdev->u.vhost_vdpa;
 
-    vdpa_device_fd = qemu_open_old(opts->vhostdev, O_RDWR);
+    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
     if (vdpa_device_fd == -1) {
         return -errno;
     }

From c829540401f280f0cad24131f4e7b92e81d506fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Fri, 12 Nov 2021 20:34:31 +0100
Subject: [PATCH 14/76] vdpa: Check for existence of opts.vhostdev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since net_init_vhost_vdpa is trying to open it. Not specifying it in the
command line crash qemu.

Fixes: 7327813d17 ("vhost-vdpa: open device fd in net_init_vhost_vdpa()")
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20211112193431.2379298-3-eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net/vhost-vdpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 1a7250b980..2e3c22a8c7 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -260,6 +260,10 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
 
     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
     opts = &netdev->u.vhost_vdpa;
+    if (!opts->vhostdev) {
+        error_setg(errp, "vdpa character device not specified with vhostdev");
+        return -1;
+    }
 
     vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
     if (vdpa_device_fd == -1) {

From 23786d13441d36e0efcfee94ba8ff218746bed6c Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:54 +0100
Subject: [PATCH 15/76] pci: implement power state

This allows to power off pci devices.  In "off" state the devices will
not be visible.  No pci config space access, no pci bar access, no dma.

Default state is "on", so this patch (alone) should not change behavior.

Use case:  Allows hotplug controllers implement slot power.  Hotplug
controllers doing so should set the inital power state for devices in
the ->plug callback.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-2-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pci.c         | 25 +++++++++++++++++++++++--
 hw/pci/pci_host.c    |  6 ++++--
 include/hw/pci/pci.h |  2 ++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 4a84e478ce..e5993c1ef5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
+        if (!d->has_power) {
+            new_addr = PCI_BAR_UNMAPPED;
+        }
 
         /* This bar isn't changed */
         if (new_addr == r->addr)
@@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
     if (range_covers_byte(addr, l, PCI_COMMAND)) {
         pci_update_irq_disabled(d, was_irq_disabled);
         memory_region_set_enabled(&d->bus_master_enable_region,
-                                  pci_get_word(d->config + PCI_COMMAND)
-                                    & PCI_COMMAND_MASTER);
+                                  (pci_get_word(d->config + PCI_COMMAND)
+                                   & PCI_COMMAND_MASTER) && d->has_power);
     }
 
     msi_write_config(d, addr, val_in, l);
@@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev));
         return;
     }
+
+    pci_set_power(pci_dev, true);
 }
 
 PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
@@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
     return msg;
 }
 
+void pci_set_power(PCIDevice *d, bool state)
+{
+    if (d->has_power == state) {
+        return;
+    }
+
+    d->has_power = state;
+    pci_update_mappings(d);
+    memory_region_set_enabled(&d->bus_master_enable_region,
+                              (pci_get_word(d->config + PCI_COMMAND)
+                               & PCI_COMMAND_MASTER) && d->has_power);
+    if (!d->has_power) {
+        pci_device_reset(d);
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index cf02f0d6a5..7beafd40a8 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return;
     }
 
@@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return ~0x0;
     }
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 5c4016b995..e7cdf2d5ec 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
 struct PCIDevice {
     DeviceState qdev;
     bool partially_hotplugged;
+    bool has_power;
 
     /* PCI config space */
     uint8_t *config;
@@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device;
 }
 
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_power(PCIDevice *pci_dev, bool state);
 
 #endif

From d5daff7d312653b92f23c7a8e198090b32b8dae6 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:55 +0100
Subject: [PATCH 16/76] pcie: implement slot power control for pcie root ports

With this patch hot-plugged pci devices will only be visible to the
guest if the guests hotplug driver has enabled slot power.

This should fix the hot-plug race which one can hit when hot-plugging
a pci device at boot, while the guest is in the middle of the pci bus
scan.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-3-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 914a9bf3d1..13d11a57c7 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev)
     }
 }
 
+static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    bool *power = opaque;
+
+    pci_set_power(dev, *power);
+}
+
+static void pcie_cap_update_power(PCIDevice *hotplug_dev)
+{
+    uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev));
+    uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+    bool power = true;
+
+    if (sltcap & PCI_EXP_SLTCAP_PCP) {
+        power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON;
+    }
+
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        pcie_set_power_device, &power);
+}
+
 /*
  * A PCI Express Hot-Plug Event has occurred, so update slot status register
  * and notify OS of the event if necessary.
@@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
             pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
                                        PCI_EXP_LNKSTA_DLLLA);
         }
+        pcie_cap_update_power(hotplug_pdev);
         return;
     }
 
@@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
         }
         pcie_cap_slot_event(hotplug_pdev,
                             PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+        pcie_cap_update_power(hotplug_pdev);
     }
 }
 
@@ -625,6 +650,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
                                  PCI_EXP_SLTSTA_PDC |
                                  PCI_EXP_SLTSTA_ABP);
 
+    pcie_cap_update_power(dev);
     hotplug_event_update_event_status(dev);
 }
 
@@ -705,6 +731,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
                                        PCI_EXP_SLTSTA_PDC);
     }
+    pcie_cap_update_power(dev);
 
     hotplug_event_notify(dev);
 
@@ -731,6 +758,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id)
 {
     PCIDevice *dev = opaque;
     hotplug_event_update_event_status(dev);
+    pcie_cap_update_power(dev);
     return 0;
 }
 

From 81124b3c7a5dae34881cd8cd9631f125da81ef97 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:56 +0100
Subject: [PATCH 17/76] pcie: add power indicator blink check

Refuse to push the attention button in case the guest is busy with some
hotplug operation (as indicated by the power indicator blinking).

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-4-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 13d11a57c7..b92dbff118 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -506,6 +506,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
     PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev);
     uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap;
     uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP);
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
 
     /* Check if hot-unplug is disabled on the slot */
     if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) {
@@ -521,6 +522,12 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
+    if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) {
+        error_setg(errp, "Hot-unplug failed: "
+                   "guest is busy (power indicator blinking)");
+        return;
+    }
+
     dev->pending_deleted_event = true;
 
     /* In case user cancel the operation of multi-function hot-add,

From 44242d4d3d082a28200fdbecaed5398122682e6e Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:57 +0100
Subject: [PATCH 18/76] pcie: factor out pcie_cap_slot_unplug()

No functional change.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-5-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index b92dbff118..959bf074b2 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -497,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
     object_unparent(OBJECT(dev));
 }
 
+static void pcie_cap_slot_do_unplug(PCIDevice *dev)
+{
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+    uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+
+    pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL);
+
+    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+                                 PCI_EXP_SLTSTA_PDS);
+    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
+        (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                     PCI_EXP_LNKSTA_DLLLA);
+    }
+    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+                               PCI_EXP_SLTSTA_PDC);
+}
+
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
 {
@@ -676,7 +695,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
     uint32_t pos = dev->exp.exp_cap;
     uint8_t *exp_cap = dev->config + pos;
     uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
-    uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
 
     if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
         /*
@@ -726,17 +744,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
         (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
         (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
         (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
-        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
-        pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL);
-        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
-                                     PCI_EXP_SLTSTA_PDS);
-        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
-            (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
-            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
-                                         PCI_EXP_LNKSTA_DLLLA);
-        }
-        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                                       PCI_EXP_SLTSTA_PDC);
+        pcie_cap_slot_do_unplug(dev);
     }
     pcie_cap_update_power(dev);
 

From 0d33415a4eafe532f3beef8272811d927236a353 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:58 +0100
Subject: [PATCH 19/76] pcie: fast unplug when slot power is off

In case the slot is powered off (and the power indicator turned off too)
we can unplug right away, without round-trip to the guest.

Also clear pending attention button press, there is nothing to care
about any more.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-6-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 959bf074b2..a930ac738a 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -560,6 +560,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
+    if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) &&
+        ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) {
+        /* slot is powered off -> unplug without round-trip to the guest */
+        pcie_cap_slot_do_unplug(hotplug_pdev);
+        hotplug_event_notify(hotplug_pdev);
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+                                     PCI_EXP_SLTSTA_ABP);
+        return;
+    }
+
     pcie_cap_slot_push_attention_button(hotplug_pdev);
 }
 

From 18416c62e36a79823a9e28f6b2260aa13c25e1d9 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Thu, 11 Nov 2021 14:08:59 +0100
Subject: [PATCH 20/76] pcie: expire pending delete

Add an expire time for pending delete, once the time is over allow
pressing the attention button again.

This makes pcie hotplug behave more like acpi hotplug, where one can
try sending an 'device_del' monitor command again in case the guest
didn't respond to the first attempt.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-Id: <20211111130859.1171890-7-kraxel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c          | 2 ++
 include/hw/qdev-core.h | 1 +
 softmmu/qdev-monitor.c | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index a930ac738a..c5ed266337 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -548,6 +548,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
     }
 
     dev->pending_deleted_event = true;
+    dev->pending_deleted_expires_ms =
+        qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */
 
     /* In case user cancel the operation of multi-function hot-add,
      * remove the function that is unexposed to guest individually,
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 72622bd337..20d3066595 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -181,6 +181,7 @@ struct DeviceState {
     char *canonical_path;
     bool realized;
     bool pending_deleted_event;
+    int64_t pending_deleted_expires_ms;
     QDict *opts;
     int hotplugged;
     bool allow_unplug_during_migration;
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index 588a62b88d..5925f1ae5f 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -943,7 +943,9 @@ void qmp_device_del(const char *id, Error **errp)
 {
     DeviceState *dev = find_device_state(id, errp);
     if (dev != NULL) {
-        if (dev->pending_deleted_event) {
+        if (dev->pending_deleted_event &&
+            (dev->pending_deleted_expires_ms == 0 ||
+             dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) {
             error_setg(errp, "Device %s is already in the "
                              "process of unplug", id);
             return;

From 01b5ab8cc08ab0afb6574f46a4d966725a00a1de Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 30 Sep 2021 16:08:40 +0100
Subject: [PATCH 21/76] hw/intc/arm_gicv3: Move checking of redist-region-count
 to arm_gicv3_common_realize

The GICv3 devices have an array property redist-region-count.
Currently we check this for errors (bad values) in
gicv3_init_irqs_and_mmio(), just before we use it.  Move this error
checking to the arm_gicv3_common_realize() function, where we
sanity-check all of the other base-class properties. (This will
always be before gicv3_init_irqs_and_mmio() is called, because
that function is called in the subclass realize methods, after
they have called the parent-class realize.)

The motivation for this refactor is:
 * we would like to use the redist_region_count[] values in
   arm_gicv3_common_realize() in a subsequent patch, so we need
   to have already done the sanity-checking first
 * this removes the only use of the Error** argument to
   gicv3_init_irqs_and_mmio(), so we can remove some error-handling
   boilerplate

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/intc/arm_gicv3.c                |  6 +-----
 hw/intc/arm_gicv3_common.c         | 26 +++++++++++++-------------
 hw/intc/arm_gicv3_kvm.c            |  6 +-----
 include/hw/intc/arm_gicv3_common.h |  2 +-
 4 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 3f24707838..bcf54a5f0a 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -393,11 +393,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops);
 
     gicv3_init_cpuif(s);
 }
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 223db16fec..8e47809398 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -250,22 +250,11 @@ static const VMStateDescription vmstate_gicv3 = {
 };
 
 void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
-                              const MemoryRegionOps *ops, Error **errp)
+                              const MemoryRegionOps *ops)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(s);
-    int rdist_capacity = 0;
     int i;
 
-    for (i = 0; i < s->nb_redist_regions; i++) {
-        rdist_capacity += s->redist_region_count[i];
-    }
-    if (rdist_capacity < s->num_cpu) {
-        error_setg(errp, "Capacity of the redist regions(%d) "
-                   "is less than number of vcpus(%d)",
-                   rdist_capacity, s->num_cpu);
-        return;
-    }
-
     /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
      * GPIO array layout is thus:
      *  [0..N-1] spi
@@ -308,7 +297,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
 static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = ARM_GICV3_COMMON(dev);
-    int i;
+    int i, rdist_capacity;
 
     /* revision property is actually reserved and currently used only in order
      * to keep the interface compatible with GICv2 code, avoiding extra
@@ -350,6 +339,17 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    rdist_capacity = 0;
+    for (i = 0; i < s->nb_redist_regions; i++) {
+        rdist_capacity += s->redist_region_count[i];
+    }
+    if (rdist_capacity < s->num_cpu) {
+        error_setg(errp, "Capacity of the redist regions(%d) "
+                   "is less than number of vcpus(%d)",
+                   rdist_capacity, s->num_cpu);
+        return;
+    }
+
     s->cpu = g_new0(GICv3CPUState, s->num_cpu);
 
     for (i = 0; i < s->num_cpu; i++) {
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 5c09f00dec..ab58c73306 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -787,11 +787,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
     for (i = 0; i < s->num_cpu; i++) {
         ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index aa4f0d6770..cb2b0d0ad4 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -306,6 +306,6 @@ struct ARMGICv3CommonClass {
 };
 
 void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
-                              const MemoryRegionOps *ops, Error **errp);
+                              const MemoryRegionOps *ops);
 
 #endif

From 046164155abe7594ad1d8729dbe150e95badeaed Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 30 Sep 2021 16:08:41 +0100
Subject: [PATCH 22/76] hw/intc/arm_gicv3: Set GICR_TYPER.Last correctly when
 nb_redist_regions > 1

The 'Last' bit in the GICR_TYPER GICv3 redistributor register is
supposed to be set to 1 if this is the last redistributor in a series
of contiguous redistributor pages.  Currently we set Last only for
the redistributor for CPU (num_cpu - 1).  This only works if there is
a single redistributor region; if there are multiple redistributor
regions then we need to set the Last bit for the last redistributor
in each region.

This doesn't cause any problems currently because only the KVM GICv3
supports multiple redistributor regions, and it ignores the value in
GICv3State::gicr_typer.  But we need to fix this before we can enable
support for multiple regions in the emulated GICv3.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/intc/arm_gicv3_common.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 8e47809398..8de9205b38 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -297,7 +297,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
 static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = ARM_GICV3_COMMON(dev);
-    int i, rdist_capacity;
+    int i, rdist_capacity, cpuidx;
 
     /* revision property is actually reserved and currently used only in order
      * to keep the interface compatible with GICv2 code, avoiding extra
@@ -355,7 +355,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < s->num_cpu; i++) {
         CPUState *cpu = qemu_get_cpu(i);
         uint64_t cpu_affid;
-        int last;
 
         s->cpu[i].cpu = cpu;
         s->cpu[i].gic = s;
@@ -375,7 +374,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
          *  PLPIS == 0 (physical LPIs not supported)
          */
         cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL);
-        last = (i == s->num_cpu - 1);
 
         /* The CPU mp-affinity property is in MPIDR register format; squash
          * the affinity bytes into 32 bits as the GICR_TYPER has them.
@@ -384,13 +382,22 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
                      (cpu_affid & 0xFFFFFF);
         s->cpu[i].gicr_typer = (cpu_affid << 32) |
             (1 << 24) |
-            (i << 8) |
-            (last << 4);
+            (i << 8);
 
         if (s->lpi_enable) {
             s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS;
         }
     }
+
+    /*
+     * Now go through and set GICR_TYPER.Last for the final
+     * redistributor in each region.
+     */
+    cpuidx = 0;
+    for (i = 0; i < s->nb_redist_regions; i++) {
+        cpuidx += s->redist_region_count[i];
+        s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST;
+    }
 }
 
 static void arm_gicv3_finalize(Object *obj)

From e5cba10ee16566a479cebec3890e91df91e33ab5 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 30 Sep 2021 16:08:42 +0100
Subject: [PATCH 23/76] hw/intc/arm_gicv3: Support multiple redistributor
 regions

Our GICv3 QOM interface includes an array property
redist-region-count which allows board models to specify that the
registributor registers are not in a single contiguous range, but
split into multiple pieces.  We implemented this for KVM, but
currently the TCG GICv3 model insists that there is only one region.
You can see the limit being hit with a setup like:
  qemu-system-aarch64 -machine virt,gic-version=3 -smp 124

Add support for split regions to the TCG GICv3.  To do this we switch
from allocating a simple array of MemoryRegions to an array of
GICv3RedistRegion structs so that we can use the GICv3RedistRegion as
the opaque pointer in the MemoryRegion read/write callbacks.  Each
GICv3RedistRegion contains the MemoryRegion, a backpointer allowing
the read/write callback to get hold of the GICv3State, and an index
which allows us to calculate which CPU's redistributor is being
accessed.

Note that arm_gicv3_kvm always passes in NULL as the ops argument
to gicv3_init_irqs_and_mmio(), so the only MemoryRegion read/write
callbacks we need to update to handle this new scheme are the
gicv3_redist_read/write functions used by the emulated GICv3.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/intc/arm_gicv3.c                |  6 -----
 hw/intc/arm_gicv3_common.c         | 15 ++++++++---
 hw/intc/arm_gicv3_kvm.c            |  4 +--
 hw/intc/arm_gicv3_redist.c         | 40 ++++++++++++++++--------------
 include/hw/intc/arm_gicv3_common.h | 12 ++++++++-
 5 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index bcf54a5f0a..c6282984b1 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -387,12 +387,6 @@ static void arm_gic_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (s->nb_redist_regions != 1) {
-        error_setg(errp, "VGICv3 redist region number(%d) not equal to 1",
-                   s->nb_redist_regions);
-        return;
-    }
-
     gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops);
 
     gicv3_init_cpuif(s);
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 8de9205b38..9884d2e39b 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -254,6 +254,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(s);
     int i;
+    int cpuidx;
 
     /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
      * GPIO array layout is thus:
@@ -282,14 +283,20 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
                           "gicv3_dist", 0x10000);
     sysbus_init_mmio(sbd, &s->iomem_dist);
 
-    s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions);
+    s->redist_regions = g_new0(GICv3RedistRegion, s->nb_redist_regions);
+    cpuidx = 0;
     for (i = 0; i < s->nb_redist_regions; i++) {
         char *name = g_strdup_printf("gicv3_redist_region[%d]", i);
+        GICv3RedistRegion *region = &s->redist_regions[i];
 
-        memory_region_init_io(&s->iomem_redist[i], OBJECT(s),
-                              ops ? &ops[1] : NULL, s, name,
+        region->gic = s;
+        region->cpuidx = cpuidx;
+        cpuidx += s->redist_region_count[i];
+
+        memory_region_init_io(&region->iomem, OBJECT(s),
+                              ops ? &ops[1] : NULL, region, name,
                               s->redist_region_count[i] * GICV3_REDIST_SIZE);
-        sysbus_init_mmio(sbd, &s->iomem_redist[i]);
+        sysbus_init_mmio(sbd, &region->iomem);
         g_free(name);
     }
 }
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index ab58c73306..5ec5ff9ef6 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -825,7 +825,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
                             KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0);
 
     if (!multiple_redist_region_allowed) {
-        kvm_arm_register_device(&s->iomem_redist[0], -1,
+        kvm_arm_register_device(&s->redist_regions[0].iomem, -1,
                                 KVM_DEV_ARM_VGIC_GRP_ADDR,
                                 KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0);
     } else {
@@ -838,7 +838,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
             uint64_t addr_ormask =
                         i | ((uint64_t)s->redist_region_count[i] << 52);
 
-            kvm_arm_register_device(&s->iomem_redist[i], -1,
+            kvm_arm_register_device(&s->redist_regions[i].iomem, -1,
                                     KVM_DEV_ARM_VGIC_GRP_ADDR,
                                     KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
                                     s->dev_fd, addr_ormask);
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
index 7072bfcbb1..424e7e28a8 100644
--- a/hw/intc/arm_gicv3_redist.c
+++ b/hw/intc/arm_gicv3_redist.c
@@ -425,22 +425,24 @@ static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset,
 MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
                               unsigned size, MemTxAttrs attrs)
 {
-    GICv3State *s = opaque;
+    GICv3RedistRegion *region = opaque;
+    GICv3State *s = region->gic;
     GICv3CPUState *cs;
     MemTxResult r;
     int cpuidx;
 
     assert((offset & (size - 1)) == 0);
 
-    /* This region covers all the redistributor pages; there are
-     * (for GICv3) two 64K pages per CPU. At the moment they are
-     * all contiguous (ie in this one region), though we might later
-     * want to allow splitting of redistributor pages into several
-     * blocks so we can support more CPUs.
+    /*
+     * There are (for GICv3) two 64K redistributor pages per CPU.
+     * In some cases the redistributor pages for all CPUs are not
+     * contiguous (eg on the virt board they are split into two
+     * parts if there are too many CPUs to all fit in the same place
+     * in the memory map); if so then the GIC has multiple MemoryRegions
+     * for the redistributors.
      */
-    cpuidx = offset / 0x20000;
-    offset %= 0x20000;
-    assert(cpuidx < s->num_cpu);
+    cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE;
+    offset %= GICV3_REDIST_SIZE;
 
     cs = &s->cpu[cpuidx];
 
@@ -482,22 +484,24 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
 MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
                                unsigned size, MemTxAttrs attrs)
 {
-    GICv3State *s = opaque;
+    GICv3RedistRegion *region = opaque;
+    GICv3State *s = region->gic;
     GICv3CPUState *cs;
     MemTxResult r;
     int cpuidx;
 
     assert((offset & (size - 1)) == 0);
 
-    /* This region covers all the redistributor pages; there are
-     * (for GICv3) two 64K pages per CPU. At the moment they are
-     * all contiguous (ie in this one region), though we might later
-     * want to allow splitting of redistributor pages into several
-     * blocks so we can support more CPUs.
+    /*
+     * There are (for GICv3) two 64K redistributor pages per CPU.
+     * In some cases the redistributor pages for all CPUs are not
+     * contiguous (eg on the virt board they are split into two
+     * parts if there are too many CPUs to all fit in the same place
+     * in the memory map); if so then the GIC has multiple MemoryRegions
+     * for the redistributors.
      */
-    cpuidx = offset / 0x20000;
-    offset %= 0x20000;
-    assert(cpuidx < s->num_cpu);
+    cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE;
+    offset %= GICV3_REDIST_SIZE;
 
     cs = &s->cpu[cpuidx];
 
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index cb2b0d0ad4..fc38e4b7dc 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -215,13 +215,23 @@ struct GICv3CPUState {
     bool seenbetter;
 };
 
+/*
+ * The redistributor pages might be split into more than one region
+ * on some machine types if there are many CPUs.
+ */
+typedef struct GICv3RedistRegion {
+    GICv3State *gic;
+    MemoryRegion iomem;
+    uint32_t cpuidx; /* index of first CPU this region covers */
+} GICv3RedistRegion;
+
 struct GICv3State {
     /*< private >*/
     SysBusDevice parent_obj;
     /*< public >*/
 
     MemoryRegion iomem_dist; /* Distributor */
-    MemoryRegion *iomem_redist; /* Redistributor Regions */
+    GICv3RedistRegion *redist_regions; /* Redistributor Regions */
     uint32_t *redist_region_count; /* redistributor count within each region */
     uint32_t nb_redist_regions; /* number of redist regions */
 

From 1adf528ec3bdf62ea3b580b7ad562534a3676ff5 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 20 Sep 2021 14:25:35 +0200
Subject: [PATCH 24/76] hw/rtc/pl031: Send RTC_CHANGE QMP event

The PL031 currently is not able to report guest RTC change to the QMP
monitor as opposed to mc146818 or spapr RTCs. This patch adds the call
to qapi_event_send_rtc_change() when the Load Register is written. The
value which is reported corresponds to the difference between the guest
reference time and the reference time kept in softmmu/rtc.c.

For instance adding 20s to the guest RTC value will report 20. Adding
an extra 20s to the guest RTC value will report 20 + 20 = 40.

The inclusion of qapi/qapi-types-misc-target.h in hw/rtl/pl031.c
require to compile the PL031 with specific_ss.add() to avoid
./qapi/qapi-types-misc-target.h:18:13: error: attempt to use poisoned
"TARGET_<ARCH>".

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 20210920122535.269988-1-eric.auger@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/rtc/meson.build |  2 +-
 hw/rtc/pl031.c     | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build
index 7cecdee5dd..8fd8d8f9a7 100644
--- a/hw/rtc/meson.build
+++ b/hw/rtc/meson.build
@@ -2,7 +2,7 @@
 softmmu_ss.add(when: 'CONFIG_DS1338', if_true: files('ds1338.c'))
 softmmu_ss.add(when: 'CONFIG_M41T80', if_true: files('m41t80.c'))
 softmmu_ss.add(when: 'CONFIG_M48T59', if_true: files('m48t59.c'))
-softmmu_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c'))
+specific_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c'))
 softmmu_ss.add(when: 'CONFIG_TWL92230', if_true: files('twl92230.c'))
 softmmu_ss.add(when: ['CONFIG_ISA_BUS', 'CONFIG_M48T59'], if_true: files('m48t59-isa.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP', if_true: files('xlnx-zynqmp-rtc.c'))
diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c
index 2bbb2062ac..e7ced90b02 100644
--- a/hw/rtc/pl031.c
+++ b/hw/rtc/pl031.c
@@ -24,6 +24,7 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "trace.h"
+#include "qapi/qapi-events-misc-target.h"
 
 #define RTC_DR      0x00    /* Data read register */
 #define RTC_MR      0x04    /* Match register */
@@ -136,10 +137,17 @@ static void pl031_write(void * opaque, hwaddr offset,
     trace_pl031_write(offset, value);
 
     switch (offset) {
-    case RTC_LR:
+    case RTC_LR: {
+        struct tm tm;
+
         s->tick_offset += value - pl031_get_count(s);
+
+        qemu_get_timedate(&tm, s->tick_offset);
+        qapi_event_send_rtc_change(qemu_timedate_diff(&tm));
+
         pl031_set_alarm(s);
         break;
+    }
     case RTC_MR:
         s->mr = value;
         pl031_set_alarm(s);

From 2523a7956519cc13c92b9214d188cd2489d4df2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 10:17:16 +0100
Subject: [PATCH 25/76] tests/unit/test-smp-parse: Restore MachineClass fields
 after modifying
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a single MachineClass object, registered with
type_register_static(&smp_machine_info). Since the same
object is used multiple times (an MachineState object
is instantiated in both test_generic and test_with_dies),
we should restore its internal state after modifying for
the test purpose.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211115145900.2531865-2-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index cbe0c99049..47774c1ee2 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -512,7 +512,7 @@ static void test_generic(void)
         smp_parse_test(ms, data, true);
     }
 
-    /* Reset the supported min CPUs and max CPUs */
+    /* Force invalid min CPUs and max CPUs */
     mc->min_cpus = 2;
     mc->max_cpus = 511;
 
@@ -523,6 +523,10 @@ static void test_generic(void)
         smp_parse_test(ms, data, false);
     }
 
+    /* Reset the supported min CPUs and max CPUs */
+    mc->min_cpus = MIN_CPUS;
+    mc->max_cpus = MAX_CPUS;
+
     object_unref(obj);
 }
 
@@ -536,6 +540,7 @@ static void test_with_dies(void)
     int i;
 
     smp_machine_class_init(mc);
+    /* Force the SMP compat properties */
     mc->smp_props.dies_supported = true;
 
     for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
@@ -575,6 +580,9 @@ static void test_with_dies(void)
         smp_parse_test(ms, data, false);
     }
 
+    /* Restore the SMP compat properties */
+    mc->smp_props.dies_supported = false;
+
     object_unref(obj);
 }
 

From c3440eff4c0b6ec7adf391620393cb9755eab8d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 09:20:22 +0100
Subject: [PATCH 26/76] tests/unit/test-smp-parse: QOM'ify
 smp_machine_class_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

smp_machine_class_init() is the actual TypeInfo::class_init().
Declare it as such in smp_machine_info, and avoid to call it
manually in each test. Move smp_machine_info definition just
before we register the type to avoid a forward declaration.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211115145900.2531865-3-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 47774c1ee2..3fff2af4e2 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -75,14 +75,6 @@ typedef struct SMPTestData {
     const char *expect_error;
 } SMPTestData;
 
-/* Type info of the tested machine */
-static const TypeInfo smp_machine_info = {
-    .name = TYPE_MACHINE,
-    .parent = TYPE_OBJECT,
-    .class_size = sizeof(MachineClass),
-    .instance_size = sizeof(MachineState),
-};
-
 /*
  * List all the possible valid sub-collections of the generic 5
  * topology parameters (i.e. cpus/maxcpus/sockets/cores/threads),
@@ -480,9 +472,10 @@ static void unsupported_params_init(MachineClass *mc, SMPTestData *data)
     }
 }
 
-/* Reset the related machine properties before each sub-test */
-static void smp_machine_class_init(MachineClass *mc)
+static void machine_base_class_init(ObjectClass *oc, void *data)
 {
+    MachineClass *mc = MACHINE_CLASS(oc);
+
     mc->min_cpus = MIN_CPUS;
     mc->max_cpus = MAX_CPUS;
 
@@ -498,8 +491,6 @@ static void test_generic(void)
     SMPTestData *data = &(SMPTestData){{ }};
     int i;
 
-    smp_machine_class_init(mc);
-
     for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
         *data = data_generic_valid[i];
         unsupported_params_init(mc, data);
@@ -539,7 +530,6 @@ static void test_with_dies(void)
     unsigned int num_dies = 2;
     int i;
 
-    smp_machine_class_init(mc);
     /* Force the SMP compat properties */
     mc->smp_props.dies_supported = true;
 
@@ -586,12 +576,24 @@ static void test_with_dies(void)
     object_unref(obj);
 }
 
+/* Type info of the tested machine */
+static const TypeInfo smp_machine_types[] = {
+    {
+        .name           = TYPE_MACHINE,
+        .parent         = TYPE_OBJECT,
+        .class_init     = machine_base_class_init,
+        .class_size     = sizeof(MachineClass),
+        .instance_size  = sizeof(MachineState),
+    }
+};
+
+DEFINE_TYPES(smp_machine_types)
+
 int main(int argc, char *argv[])
 {
-    g_test_init(&argc, &argv, NULL);
-
     module_call_init(MODULE_INIT_QOM);
-    type_register_static(&smp_machine_info);
+
+    g_test_init(&argc, &argv, NULL);
 
     g_test_add_func("/test-smp-parse/generic", test_generic);
     g_test_add_func("/test-smp-parse/with_dies", test_with_dies);

From 7b6d1bc9629f3dd45647ec3418e0606a92dddd48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 10:27:45 +0100
Subject: [PATCH 27/76] tests/unit/test-smp-parse: Explicit MachineClass name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the MachineClass::name pointer is not explicitly set, it is NULL.
Per the C standard, passing a NULL pointer to printf "%s" format is
undefined. Some implementations display it as 'NULL', other as 'null'.
Since we are comparing the formatted output, we need a stable value.
The easiest is to explicit a machine name string.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211115145900.2531865-4-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 3fff2af4e2..b02450e25a 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -23,6 +23,8 @@
 #define MIN_CPUS 1   /* set the min CPUs supported by the machine as 1 */
 #define MAX_CPUS 512 /* set the max CPUs supported by the machine as 512 */
 
+#define SMP_MACHINE_NAME "TEST-SMP"
+
 /*
  * Used to define the generic 3-level CPU topology hierarchy
  *  -sockets/cores/threads
@@ -307,13 +309,13 @@ static struct SMPTestData data_generic_invalid[] = {
          * should tweak the supported min CPUs to 2 for testing */
         .config = SMP_CONFIG_GENERIC(T, 1, F, 0, F, 0, F, 0, F, 0),
         .expect_error = "Invalid SMP CPUs 1. The min CPUs supported "
-                        "by machine '(null)' is 2",
+                        "by machine '" SMP_MACHINE_NAME "' is 2",
     }, {
         /* config: -smp 512
          * should tweak the supported max CPUs to 511 for testing */
         .config = SMP_CONFIG_GENERIC(T, 512, F, 0, F, 0, F, 0, F, 0),
         .expect_error = "Invalid SMP CPUs 512. The max CPUs supported "
-                        "by machine '(null)' is 511",
+                        "by machine '" SMP_MACHINE_NAME "' is 511",
     },
 };
 
@@ -481,6 +483,8 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
 
     mc->smp_props.prefer_sockets = true;
     mc->smp_props.dies_supported = false;
+
+    mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
 static void test_generic(void)

From 8d3dd037d947ce96c42c376ef0bb5e5c6ef96cbe Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:53:57 +0100
Subject: [PATCH 28/76] stream: Traverse graph after modification

bdrv_cor_filter_drop() modifies the block graph.  That means that other
parties can also modify the block graph before it returns.  Therefore,
we cannot assume that the result of a graph traversal we did before
remains valid afterwards.

We should thus fetch `base` and `unfiltered_base` afterwards instead of
before.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20211111120829.81329-2-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-2-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block/stream.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 97bee482dc..e45113aed6 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,8 +54,8 @@ static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
-    BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
-    BlockDriverState *unfiltered_base = bdrv_skip_filters(base);
+    BlockDriverState *base;
+    BlockDriverState *unfiltered_base;
     Error *local_err = NULL;
     int ret = 0;
 
@@ -63,6 +63,9 @@ static int stream_prepare(Job *job)
     bdrv_cor_filter_drop(s->cor_filter_bs);
     s->cor_filter_bs = NULL;
 
+    base = bdrv_filter_or_cow_bs(s->above_base);
+    unfiltered_base = bdrv_skip_filters(base);
+
     if (bdrv_cow_child(unfiltered_bs)) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (unfiltered_base) {

From a225369bce684c01095be52e1014e3fa91dec210 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:53:58 +0100
Subject: [PATCH 29/76] block: Manipulate children list in .attach/.detach

The children list is specific to BDS parents.  We should not modify it
in the general children modification code, but let BDS parents deal with
it in their .attach() and .detach() methods.

This also has the advantage that a BdrvChild is removed from the
children list before its .bs pointer can become NULL.  BDS parents
generally assume that their children's .bs pointer is never NULL, so
this is actually a bug fix.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20211111120829.81329-3-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-3-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 580cb77a70..ca024ffced 100644
--- a/block.c
+++ b/block.c
@@ -1387,6 +1387,8 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
+    QLIST_INSERT_HEAD(&bs->children, child, next);
+
     if (child->role & BDRV_CHILD_COW) {
         bdrv_backing_attach(child);
     }
@@ -1403,6 +1405,8 @@ static void bdrv_child_cb_detach(BdrvChild *child)
     }
 
     bdrv_unapply_subtree_drain(child, bs);
+
+    QLIST_REMOVE(child, next);
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@@ -2747,7 +2751,7 @@ static void bdrv_child_free(void *opaque)
 static void bdrv_remove_empty_child(BdrvChild *child)
 {
     assert(!child->bs);
-    QLIST_SAFE_REMOVE(child, next);
+    assert(!child->next.le_prev); /* not in children list */
     bdrv_child_free(child);
 }
 
@@ -2913,12 +2917,6 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
         return ret;
     }
 
-    QLIST_INSERT_HEAD(&parent_bs->children, *child, next);
-    /*
-     * child is removed in bdrv_attach_child_common_abort(), so don't care to
-     * abort this change separately.
-     */
-
     return 0;
 }
 
@@ -4851,7 +4849,6 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
     BdrvRemoveFilterOrCowChild *s = opaque;
     BlockDriverState *parent_bs = s->child->opaque;
 
-    QLIST_INSERT_HEAD(&parent_bs->children, s->child, next);
     if (s->is_backing) {
         parent_bs->backing = s->child;
     } else {
@@ -4906,7 +4903,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
     };
     tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
 
-    QLIST_SAFE_REMOVE(child, next);
     if (s->is_backing) {
         bs->backing = NULL;
     } else {

From 04c9c3a52c21088e7039956e5a635250fe3eaee6 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:53:59 +0100
Subject: [PATCH 30/76] block: Unite remove_empty_child and child_free

Now that bdrv_remove_empty_child() no longer removes the child from the
parent's children list but only checks that it is not in such a list, it
is only a wrapper around bdrv_child_free() that checks that the child is
empty and unused.  That should apply to all children that we free, so
put those checks into bdrv_child_free() and drop
bdrv_remove_empty_child().

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20211111120829.81329-4-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-4-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/block.c b/block.c
index ca024ffced..19bff4f95c 100644
--- a/block.c
+++ b/block.c
@@ -2740,19 +2740,19 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
     }
 }
 
-static void bdrv_child_free(void *opaque)
-{
-    BdrvChild *c = opaque;
-
-    g_free(c->name);
-    g_free(c);
-}
-
-static void bdrv_remove_empty_child(BdrvChild *child)
+/**
+ * Free the given @child.
+ *
+ * The child must be empty (i.e. `child->bs == NULL`) and it must be
+ * unused (i.e. not in a children list).
+ */
+static void bdrv_child_free(BdrvChild *child)
 {
     assert(!child->bs);
     assert(!child->next.le_prev); /* not in children list */
-    bdrv_child_free(child);
+
+    g_free(child->name);
+    g_free(child);
 }
 
 typedef struct BdrvAttachChildCommonState {
@@ -2786,7 +2786,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
     }
 
     bdrv_unref(bs);
-    bdrv_remove_empty_child(child);
+    bdrv_child_free(child);
     *s->child = NULL;
 }
 
@@ -2859,7 +2859,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
 
         if (ret < 0) {
             error_propagate(errp, local_err);
-            bdrv_remove_empty_child(new_child);
+            bdrv_child_free(new_child);
             return ret;
         }
     }
@@ -2925,7 +2925,7 @@ static void bdrv_detach_child(BdrvChild *child)
     BlockDriverState *old_bs = child->bs;
 
     bdrv_replace_child_noperm(child, NULL);
-    bdrv_remove_empty_child(child);
+    bdrv_child_free(child);
 
     if (old_bs) {
         /*

From 265180614189b969058f5e507c8964fc7dfd422f Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:00 +0100
Subject: [PATCH 31/76] block: Drop detached child from ignore list

bdrv_attach_child_common_abort() restores the parent's AioContext.  To
do so, the child (which was supposed to be attached, but is now detached
again by this abort handler) is added to the ignore list for the
AioContext changing functions.

However, since we modify a BDS's children list in the BdrvChildClass's
.attach and .detach handlers, the child is already effectively detached
from the parent by this point.  We do not need to put it into the ignore
list.

Use this opportunity to clean up the empty line structure: Keep setting
the ignore list, invoking the AioContext function, and freeing the
ignore list in blocks separated by empty lines.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-Id: <20211111120829.81329-5-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-5-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index 19bff4f95c..c7d5aa5254 100644
--- a/block.c
+++ b/block.c
@@ -2774,14 +2774,16 @@ static void bdrv_attach_child_common_abort(void *opaque)
     }
 
     if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
-        GSList *ignore = g_slist_prepend(NULL, child);
+        GSList *ignore;
 
+        /* No need to ignore `child`, because it has been detached already */
+        ignore = NULL;
         child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
                                       &error_abort);
         g_slist_free(ignore);
-        ignore = g_slist_prepend(NULL, child);
-        child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
 
+        ignore = NULL;
+        child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
         g_slist_free(ignore);
     }
 

From be64bbb0149748f3999c49b13976aafb8330ea86 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:01 +0100
Subject: [PATCH 32/76] block: Pass BdrvChild ** to replace_child_noperm

bdrv_replace_child_noperm() modifies BdrvChild.bs, and can potentially
set it to NULL.  That is dangerous, because BDS parents generally assume
that their children's .bs pointer is never NULL.  We therefore want to
let bdrv_replace_child_noperm() set the corresponding BdrvChild pointer
to NULL, too.

This patch lays the foundation for it by passing a BdrvChild ** pointer
to bdrv_replace_child_noperm() so that it can later use it to NULL the
BdrvChild pointer immediately after setting BdrvChild.bs to NULL.

(We will still need to undertake some intermediate steps, though.)

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-6-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-6-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index c7d5aa5254..d668156eca 100644
--- a/block.c
+++ b/block.c
@@ -87,7 +87,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
 static bool bdrv_recurse_has_child(BlockDriverState *bs,
                                    BlockDriverState *child);
 
-static void bdrv_replace_child_noperm(BdrvChild *child,
+static void bdrv_replace_child_noperm(BdrvChild **child,
                                       BlockDriverState *new_bs);
 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
                                               BdrvChild *child,
@@ -2270,7 +2270,7 @@ static void bdrv_replace_child_abort(void *opaque)
     BlockDriverState *new_bs = s->child->bs;
 
     /* old_bs reference is transparently moved from @s to @s->child */
-    bdrv_replace_child_noperm(s->child, s->old_bs);
+    bdrv_replace_child_noperm(&s->child, s->old_bs);
     bdrv_unref(new_bs);
 }
 
@@ -2300,7 +2300,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
     if (new_bs) {
         bdrv_ref(new_bs);
     }
-    bdrv_replace_child_noperm(child, new_bs);
+    bdrv_replace_child_noperm(&child, new_bs);
     /* old_bs reference is transparently moved from @child to @s */
 }
 
@@ -2672,9 +2672,10 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
     return permissions[qapi_perm];
 }
 
-static void bdrv_replace_child_noperm(BdrvChild *child,
+static void bdrv_replace_child_noperm(BdrvChild **childp,
                                       BlockDriverState *new_bs)
 {
+    BdrvChild *child = *childp;
     BlockDriverState *old_bs = child->bs;
     int new_bs_quiesce_counter;
     int drain_saldo;
@@ -2767,7 +2768,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
     BdrvChild *child = *s->child;
     BlockDriverState *bs = child->bs;
 
-    bdrv_replace_child_noperm(child, NULL);
+    bdrv_replace_child_noperm(s->child, NULL);
 
     if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
         bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
@@ -2867,7 +2868,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
     }
 
     bdrv_ref(child_bs);
-    bdrv_replace_child_noperm(new_child, child_bs);
+    bdrv_replace_child_noperm(&new_child, child_bs);
 
     *child = new_child;
 
@@ -2922,12 +2923,12 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
     return 0;
 }
 
-static void bdrv_detach_child(BdrvChild *child)
+static void bdrv_detach_child(BdrvChild **childp)
 {
-    BlockDriverState *old_bs = child->bs;
+    BlockDriverState *old_bs = (*childp)->bs;
 
-    bdrv_replace_child_noperm(child, NULL);
-    bdrv_child_free(child);
+    bdrv_replace_child_noperm(childp, NULL);
+    bdrv_child_free(*childp);
 
     if (old_bs) {
         /*
@@ -3033,7 +3034,7 @@ void bdrv_root_unref_child(BdrvChild *child)
     BlockDriverState *child_bs;
 
     child_bs = child->bs;
-    bdrv_detach_child(child);
+    bdrv_detach_child(&child);
     bdrv_unref(child_bs);
 }
 

From 562bda8bb41879eeda0bd484dd3d55134579b28e Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:02 +0100
Subject: [PATCH 33/76] block: Restructure remove_file_or_backing_child()

As of a future patch, bdrv_replace_child_tran() will take a BdrvChild **
pointer.  Prepare for that by getting such a pointer and using it where
applicable, and (dereferenced) as a parameter for
bdrv_replace_child_tran().

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-7-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-7-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index d668156eca..8da057f800 100644
--- a/block.c
+++ b/block.c
@@ -4887,30 +4887,33 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
                                               BdrvChild *child,
                                               Transaction *tran)
 {
+    BdrvChild **childp;
     BdrvRemoveFilterOrCowChild *s;
 
-    assert(child == bs->backing || child == bs->file);
-
     if (!child) {
         return;
     }
 
+    if (child == bs->backing) {
+        childp = &bs->backing;
+    } else if (child == bs->file) {
+        childp = &bs->file;
+    } else {
+        g_assert_not_reached();
+    }
+
     if (child->bs) {
-        bdrv_replace_child_tran(child, NULL, tran);
+        bdrv_replace_child_tran(*childp, NULL, tran);
     }
 
     s = g_new(BdrvRemoveFilterOrCowChild, 1);
     *s = (BdrvRemoveFilterOrCowChild) {
         .child = child,
-        .is_backing = (child == bs->backing),
+        .is_backing = (childp == &bs->backing),
     };
     tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
 
-    if (s->is_backing) {
-        bs->backing = NULL;
-    } else {
-        bs->file = NULL;
-    }
+    *childp = NULL;
 }
 
 /*

From 079bff693bc47bf69fb131f87a03c1689e48ed55 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:03 +0100
Subject: [PATCH 34/76] transactions: Invoke clean() after everything else

Invoke the transaction drivers' .clean() methods only after all
.commit() or .abort() handlers are done.

This makes it easier to have nested transactions where the top-level
transactions pass objects to lower transactions that the latter can
still use throughout their commit/abort phases, while the top-level
transaction keeps a reference that is released in its .clean() method.

(Before this commit, that is also possible, but the top-level
transaction would need to take care to invoke tran_add() before the
lower-level transaction does.  This commit makes the ordering
irrelevant, which is just a bit nicer.)

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-8-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-8-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 include/qemu/transactions.h | 3 +++
 util/transactions.c         | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
index 92c5965235..2f2060acd9 100644
--- a/include/qemu/transactions.h
+++ b/include/qemu/transactions.h
@@ -31,6 +31,9 @@
  * tran_create(), call your "prepare" functions on it, and finally call
  * tran_abort() or tran_commit() to finalize the transaction by corresponding
  * finalization actions in reverse order.
+ *
+ * The clean() functions registered by the drivers in a transaction are called
+ * last, after all abort() or commit() functions have been called.
  */
 
 #ifndef QEMU_TRANSACTIONS_H
diff --git a/util/transactions.c b/util/transactions.c
index d0bc9a3e73..2dbdedce95 100644
--- a/util/transactions.c
+++ b/util/transactions.c
@@ -61,11 +61,13 @@ void tran_abort(Transaction *tran)
 {
     TransactionAction *act, *next;
 
-    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+    QSLIST_FOREACH(act, &tran->actions, entry) {
         if (act->drv->abort) {
             act->drv->abort(act->opaque);
         }
+    }
 
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
         if (act->drv->clean) {
             act->drv->clean(act->opaque);
         }
@@ -80,11 +82,13 @@ void tran_commit(Transaction *tran)
 {
     TransactionAction *act, *next;
 
-    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+    QSLIST_FOREACH(act, &tran->actions, entry) {
         if (act->drv->commit) {
             act->drv->commit(act->opaque);
         }
+    }
 
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
         if (act->drv->clean) {
             act->drv->clean(act->opaque);
         }

From 82b54cf51656bf3cd5ed1ac549e8a1085a0e3290 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:04 +0100
Subject: [PATCH 35/76] block: Let replace_child_tran keep indirect pointer

As of a future commit, bdrv_replace_child_noperm() will clear the
indirect BdrvChild pointer passed to it if the new child BDS is NULL.
bdrv_replace_child_tran() will want to let it do that, but revert this
change in its abort handler.  For that, we need to have it receive a
BdrvChild ** pointer, too, and keep it stored in the
BdrvReplaceChildState object that we attach to the transaction.

Note that we do not need to store it in the BdrvReplaceChildState when
new_bs is not NULL, because then there is nothing to revert.  This is
important so that bdrv_replace_node_noperm() can pass a pointer to a
loop-local variable to bdrv_replace_child_tran() without worrying that
this pointer will outlive one loop iteration.

(Of course, for that to work, bdrv_replace_node_noperm() and in turn
bdrv_replace_node() and its relatives may not be called with a NULL @to
node.  Luckily, they already are not, but now we should assert this.)

bdrv_remove_file_or_backing_child() on the other hand needs to ensure
that the indirect pointer it passes will stay valid for the duration of
the transaction.  Ensure this by keeping a strong reference to the BDS
whose &bs->backing or &bs->file it passes to bdrv_replace_child_tran(),
and giving up that reference only in the transaction .clean() handler.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-9-hreitz@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-9-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 10 deletions(-)

diff --git a/block.c b/block.c
index 8da057f800..a40027161c 100644
--- a/block.c
+++ b/block.c
@@ -2254,6 +2254,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
 
 typedef struct BdrvReplaceChildState {
     BdrvChild *child;
+    BdrvChild **childp;
     BlockDriverState *old_bs;
 } BdrvReplaceChildState;
 
@@ -2269,7 +2270,29 @@ static void bdrv_replace_child_abort(void *opaque)
     BdrvReplaceChildState *s = opaque;
     BlockDriverState *new_bs = s->child->bs;
 
-    /* old_bs reference is transparently moved from @s to @s->child */
+    /*
+     * old_bs reference is transparently moved from @s to s->child.
+     *
+     * Pass &s->child here instead of s->childp, because:
+     * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
+     *     modify the BdrvChild * pointer we indirectly pass to it, i.e. it
+     *     will not modify s->child.  From that perspective, it does not matter
+     *     whether we pass s->childp or &s->child.
+     *     (TODO: Right now, bdrv_replace_child_noperm() never modifies that
+     *     pointer anyway (though it will in the future), so at this point it
+     *     absolutely does not matter whether we pass s->childp or &s->child.)
+     * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
+     *     it here.
+     * (3) If new_bs is NULL, *s->childp will have been NULLed by
+     *     bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
+     *     must not pass a NULL *s->childp here.
+     *     (TODO: In its current state, bdrv_replace_child_noperm() will not
+     *     have NULLed *s->childp, so this does not apply yet.  It will in the
+     *     future.)
+     *
+     * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
+     * any case, there is no reason to pass it anyway.
+     */
     bdrv_replace_child_noperm(&s->child, s->old_bs);
     bdrv_unref(new_bs);
 }
@@ -2286,22 +2309,32 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  * Note: real unref of old_bs is done only on commit.
  *
  * The function doesn't update permissions, caller is responsible for this.
+ *
+ * Note that if new_bs == NULL, @childp is stored in a state object attached
+ * to @tran, so that the old child can be reinstated in the abort handler.
+ * Therefore, if @new_bs can be NULL, @childp must stay valid until the
+ * transaction is committed or aborted.
+ *
+ * (TODO: The reinstating does not happen yet, but it will once
+ * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.)
  */
-static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
+static void bdrv_replace_child_tran(BdrvChild **childp,
+                                    BlockDriverState *new_bs,
                                     Transaction *tran)
 {
     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
     *s = (BdrvReplaceChildState) {
-        .child = child,
-        .old_bs = child->bs,
+        .child = *childp,
+        .childp = new_bs == NULL ? childp : NULL,
+        .old_bs = (*childp)->bs,
     };
     tran_add(tran, &bdrv_replace_child_drv, s);
 
     if (new_bs) {
         bdrv_ref(new_bs);
     }
-    bdrv_replace_child_noperm(&child, new_bs);
-    /* old_bs reference is transparently moved from @child to @s */
+    bdrv_replace_child_noperm(childp, new_bs);
+    /* old_bs reference is transparently moved from *childp to @s */
 }
 
 /*
@@ -4844,6 +4877,7 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
 
 typedef struct BdrvRemoveFilterOrCowChild {
     BdrvChild *child;
+    BlockDriverState *bs;
     bool is_backing;
 } BdrvRemoveFilterOrCowChild;
 
@@ -4873,10 +4907,19 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
     bdrv_child_free(s->child);
 }
 
+static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
+{
+    BdrvRemoveFilterOrCowChild *s = opaque;
+
+    /* Drop the bs reference after the transaction is done */
+    bdrv_unref(s->bs);
+    g_free(s);
+}
+
 static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
     .abort = bdrv_remove_filter_or_cow_child_abort,
     .commit = bdrv_remove_filter_or_cow_child_commit,
-    .clean = g_free,
+    .clean = bdrv_remove_filter_or_cow_child_clean,
 };
 
 /*
@@ -4894,6 +4937,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
         return;
     }
 
+    /*
+     * Keep a reference to @bs so @childp will stay valid throughout the
+     * transaction (required by bdrv_replace_child_tran())
+     */
+    bdrv_ref(bs);
     if (child == bs->backing) {
         childp = &bs->backing;
     } else if (child == bs->file) {
@@ -4903,12 +4951,13 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
     }
 
     if (child->bs) {
-        bdrv_replace_child_tran(*childp, NULL, tran);
+        bdrv_replace_child_tran(childp, NULL, tran);
     }
 
     s = g_new(BdrvRemoveFilterOrCowChild, 1);
     *s = (BdrvRemoveFilterOrCowChild) {
         .child = child,
+        .bs = bs,
         .is_backing = (childp == &bs->backing),
     };
     tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
@@ -4934,6 +4983,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
 {
     BdrvChild *c, *next;
 
+    assert(to != NULL);
+
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
         if (!should_update_child(c, to)) {
@@ -4949,7 +5000,12 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
                        c->name, from->node_name);
             return -EPERM;
         }
-        bdrv_replace_child_tran(c, to, tran);
+
+        /*
+         * Passing a pointer to the local variable @c is fine here, because
+         * @to is not NULL, and so &c will not be attached to the transaction.
+         */
+        bdrv_replace_child_tran(&c, to, tran);
     }
 
     return 0;
@@ -4964,6 +5020,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
  *
  * With @detach_subchain=true @to must be in a backing chain of @from. In this
  * case backing link of the cow-parent of @to is removed.
+ *
+ * @to must not be NULL.
  */
 static int bdrv_replace_node_common(BlockDriverState *from,
                                     BlockDriverState *to,
@@ -4976,6 +5034,8 @@ static int bdrv_replace_node_common(BlockDriverState *from,
     BlockDriverState *to_cow_parent = NULL;
     int ret;
 
+    assert(to != NULL);
+
     if (detach_subchain) {
         assert(bdrv_chain_contains(from, to));
         assert(from != to);
@@ -5031,6 +5091,9 @@ out:
     return ret;
 }
 
+/**
+ * Replace node @from by @to (where neither may be NULL).
+ */
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                       Error **errp)
 {
@@ -5098,7 +5161,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
     bdrv_drained_begin(old_bs);
     bdrv_drained_begin(new_bs);
 
-    bdrv_replace_child_tran(child, new_bs, tran);
+    bdrv_replace_child_tran(&child, new_bs, tran);
 
     found = g_hash_table_new(NULL, NULL);
     refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);

From b0a9f6fed3d80de610dcd04a7e66f9f30a04174f Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:05 +0100
Subject: [PATCH 36/76] block: Let replace_child_noperm free children

In most of the block layer, especially when traversing down from other
BlockDriverStates, we assume that BdrvChild.bs can never be NULL.  When
it becomes NULL, it is expected that the corresponding BdrvChild pointer
also becomes NULL and the BdrvChild object is freed.

Therefore, once bdrv_replace_child_noperm() sets the BdrvChild.bs
pointer to NULL, it should also immediately set the corresponding
BdrvChild pointer (like bs->file or bs->backing) to NULL.

In that context, it also makes sense for this function to free the
child.  Sometimes we cannot do so, though, because it is called in a
transactional context where the caller might still want to reinstate the
child in the abort branch (and free it only on commit), so this behavior
has to remain optional.

In bdrv_replace_child_tran()'s abort handler, we now rely on the fact
that the BdrvChild passed to bdrv_replace_child_tran() must have had a
non-NULL .bs pointer initially.  Make a note of that and assert it.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-10-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-10-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 block.c | 102 +++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 79 insertions(+), 23 deletions(-)

diff --git a/block.c b/block.c
index a40027161c..0ac5b163d2 100644
--- a/block.c
+++ b/block.c
@@ -87,8 +87,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
 static bool bdrv_recurse_has_child(BlockDriverState *bs,
                                    BlockDriverState *child);
 
+static void bdrv_child_free(BdrvChild *child);
 static void bdrv_replace_child_noperm(BdrvChild **child,
-                                      BlockDriverState *new_bs);
+                                      BlockDriverState *new_bs,
+                                      bool free_empty_child);
 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
                                               BdrvChild *child,
                                               Transaction *tran);
@@ -2256,12 +2258,16 @@ typedef struct BdrvReplaceChildState {
     BdrvChild *child;
     BdrvChild **childp;
     BlockDriverState *old_bs;
+    bool free_empty_child;
 } BdrvReplaceChildState;
 
 static void bdrv_replace_child_commit(void *opaque)
 {
     BdrvReplaceChildState *s = opaque;
 
+    if (s->free_empty_child && !s->child->bs) {
+        bdrv_child_free(s->child);
+    }
     bdrv_unref(s->old_bs);
 }
 
@@ -2278,22 +2284,26 @@ static void bdrv_replace_child_abort(void *opaque)
      *     modify the BdrvChild * pointer we indirectly pass to it, i.e. it
      *     will not modify s->child.  From that perspective, it does not matter
      *     whether we pass s->childp or &s->child.
-     *     (TODO: Right now, bdrv_replace_child_noperm() never modifies that
-     *     pointer anyway (though it will in the future), so at this point it
-     *     absolutely does not matter whether we pass s->childp or &s->child.)
      * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
      *     it here.
      * (3) If new_bs is NULL, *s->childp will have been NULLed by
      *     bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
      *     must not pass a NULL *s->childp here.
-     *     (TODO: In its current state, bdrv_replace_child_noperm() will not
-     *     have NULLed *s->childp, so this does not apply yet.  It will in the
-     *     future.)
      *
      * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
      * any case, there is no reason to pass it anyway.
      */
-    bdrv_replace_child_noperm(&s->child, s->old_bs);
+    bdrv_replace_child_noperm(&s->child, s->old_bs, true);
+    /*
+     * The child was pre-existing, so s->old_bs must be non-NULL, and
+     * s->child thus must not have been freed
+     */
+    assert(s->child != NULL);
+    if (!new_bs) {
+        /* As described above, *s->childp was cleared, so restore it */
+        assert(s->childp != NULL);
+        *s->childp = s->child;
+    }
     bdrv_unref(new_bs);
 }
 
@@ -2310,30 +2320,44 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  *
  * The function doesn't update permissions, caller is responsible for this.
  *
+ * (*childp)->bs must not be NULL.
+ *
  * Note that if new_bs == NULL, @childp is stored in a state object attached
  * to @tran, so that the old child can be reinstated in the abort handler.
  * Therefore, if @new_bs can be NULL, @childp must stay valid until the
  * transaction is committed or aborted.
  *
- * (TODO: The reinstating does not happen yet, but it will once
- * bdrv_replace_child_noperm() NULLs *childp when new_bs is NULL.)
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed (on commit).  @free_empty_child should only be false if the
+ * caller will free the BDrvChild themselves (which may be important
+ * if this is in turn called in another transactional context).
  */
 static void bdrv_replace_child_tran(BdrvChild **childp,
                                     BlockDriverState *new_bs,
-                                    Transaction *tran)
+                                    Transaction *tran,
+                                    bool free_empty_child)
 {
     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
     *s = (BdrvReplaceChildState) {
         .child = *childp,
         .childp = new_bs == NULL ? childp : NULL,
         .old_bs = (*childp)->bs,
+        .free_empty_child = free_empty_child,
     };
     tran_add(tran, &bdrv_replace_child_drv, s);
 
+    /* The abort handler relies on this */
+    assert(s->old_bs != NULL);
+
     if (new_bs) {
         bdrv_ref(new_bs);
     }
-    bdrv_replace_child_noperm(childp, new_bs);
+    /*
+     * Pass free_empty_child=false, we will free the child (if
+     * necessary) in bdrv_replace_child_commit() (if our
+     * @free_empty_child parameter was true).
+     */
+    bdrv_replace_child_noperm(childp, new_bs, false);
     /* old_bs reference is transparently moved from *childp to @s */
 }
 
@@ -2705,8 +2729,22 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
     return permissions[qapi_perm];
 }
 
+/**
+ * Replace (*childp)->bs by @new_bs.
+ *
+ * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
+ * generally cannot handle a BdrvChild with .bs == NULL, so clearing
+ * BdrvChild.bs should generally immediately be followed by the
+ * BdrvChild pointer being cleared as well.
+ *
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed.  @free_empty_child should only be false if the caller will
+ * free the BdrvChild themselves (this may be important in a
+ * transactional context, where it may only be freed on commit).
+ */
 static void bdrv_replace_child_noperm(BdrvChild **childp,
-                                      BlockDriverState *new_bs)
+                                      BlockDriverState *new_bs,
+                                      bool free_empty_child)
 {
     BdrvChild *child = *childp;
     BlockDriverState *old_bs = child->bs;
@@ -2743,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
     }
 
     child->bs = new_bs;
+    if (!new_bs) {
+        *childp = NULL;
+    }
 
     if (new_bs) {
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
@@ -2772,6 +2813,10 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
         bdrv_parent_drained_end_single(child);
         drain_saldo++;
     }
+
+    if (free_empty_child && !child->bs) {
+        bdrv_child_free(child);
+    }
 }
 
 /**
@@ -2801,7 +2846,14 @@ static void bdrv_attach_child_common_abort(void *opaque)
     BdrvChild *child = *s->child;
     BlockDriverState *bs = child->bs;
 
-    bdrv_replace_child_noperm(s->child, NULL);
+    /*
+     * Pass free_empty_child=false, because we still need the child
+     * for the AioContext operations on the parent below; those
+     * BdrvChildClass methods all work on a BdrvChild object, so we
+     * need to keep it as an empty shell (after this function, it will
+     * not be attached to any parent, and it will not have a .bs).
+     */
+    bdrv_replace_child_noperm(s->child, NULL, false);
 
     if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
         bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
@@ -2823,7 +2875,6 @@ static void bdrv_attach_child_common_abort(void *opaque)
 
     bdrv_unref(bs);
     bdrv_child_free(child);
-    *s->child = NULL;
 }
 
 static TransactionActionDrv bdrv_attach_child_common_drv = {
@@ -2901,7 +2952,9 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
     }
 
     bdrv_ref(child_bs);
-    bdrv_replace_child_noperm(&new_child, child_bs);
+    bdrv_replace_child_noperm(&new_child, child_bs, true);
+    /* child_bs was non-NULL, so new_child must not have been freed */
+    assert(new_child != NULL);
 
     *child = new_child;
 
@@ -2960,8 +3013,7 @@ static void bdrv_detach_child(BdrvChild **childp)
 {
     BlockDriverState *old_bs = (*childp)->bs;
 
-    bdrv_replace_child_noperm(childp, NULL);
-    bdrv_child_free(*childp);
+    bdrv_replace_child_noperm(childp, NULL, true);
 
     if (old_bs) {
         /*
@@ -4951,7 +5003,11 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
     }
 
     if (child->bs) {
-        bdrv_replace_child_tran(childp, NULL, tran);
+        /*
+         * Pass free_empty_child=false, we will free the child in
+         * bdrv_remove_filter_or_cow_child_commit()
+         */
+        bdrv_replace_child_tran(childp, NULL, tran, false);
     }
 
     s = g_new(BdrvRemoveFilterOrCowChild, 1);
@@ -4961,8 +5017,6 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
         .is_backing = (childp == &bs->backing),
     };
     tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
-
-    *childp = NULL;
 }
 
 /*
@@ -5005,7 +5059,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
          * Passing a pointer to the local variable @c is fine here, because
          * @to is not NULL, and so &c will not be attached to the transaction.
          */
-        bdrv_replace_child_tran(&c, to, tran);
+        bdrv_replace_child_tran(&c, to, tran, true);
     }
 
     return 0;
@@ -5161,7 +5215,9 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
     bdrv_drained_begin(old_bs);
     bdrv_drained_begin(new_bs);
 
-    bdrv_replace_child_tran(&child, new_bs, tran);
+    bdrv_replace_child_tran(&child, new_bs, tran, true);
+    /* @new_bs must have been non-NULL, so @child must not have been freed */
+    assert(child != NULL);
 
     found = g_hash_table_new(NULL, NULL);
     refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);

From 16e29cc050516ac0915f0db9226079b17dcbcc51 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Mon, 15 Nov 2021 15:54:06 +0100
Subject: [PATCH 37/76] iotests/030: Unthrottle parallel jobs in reverse

See the comment for why this is necessary.

Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211111120829.81329-11-hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-11-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 tests/qemu-iotests/030 | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 5fb65b4bef..567bf1da67 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase):
                                  speed=1024)
             self.assert_qmp(result, 'return', {})
 
-        for job in pending_jobs:
+        # Do this in reverse: After unthrottling them, some jobs may finish
+        # before we have unthrottled all of them.  This will drain their
+        # subgraph, and this will make jobs above them advance (despite those
+        # jobs on top being throttled).  In the worst case, all jobs below the
+        # top one are finished before we can unthrottle it, and this makes it
+        # advance so far that it completes before we can unthrottle it - which
+        # results in an error.
+        # Starting from the top (i.e. in reverse) does not have this problem:
+        # When a job finishes, the ones below it are not advanced.
+        for job in reversed(pending_jobs):
             result = self.vm.qmp('block-job-set-speed', device=job, speed=0)
             self.assert_qmp(result, 'return', {})
 

From 4d8b0f0a95361b1a3888f3eb3f76b27420383753 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 15 Nov 2021 15:54:07 +0100
Subject: [PATCH 38/76] docs: Deprecate incorrectly typed device_add arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While introducing a non-QemuOpts code path for device creation for JSON
-device, we noticed that QMP device_add doesn't check its input
correctly (accepting arguments that should have been rejected), and that
users may be relying on this behaviour (libvirt did until it was fixed
recently).

Let's use a deprecation period before we fix this bug in QEMU to avoid
nasty surprises for users.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211111143530.18985-1-kwolf@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-12-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 docs/about/deprecated.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 600031210d..c03fcf951f 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -250,6 +250,20 @@ options are removed in favor of using explicit ``blockdev-create`` and
 ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for
 details.
 
+Incorrectly typed ``device_add`` arguments (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Due to shortcomings in the internal implementation of ``device_add``, QEMU
+incorrectly accepts certain invalid arguments: Any object or list arguments are
+silently ignored. Other argument types are not checked, but an implicit
+conversion happens, so that e.g. string values can be assigned to integer
+device properties or vice versa.
+
+This is a bug in QEMU that will be fixed in the future so that previously
+accepted incorrect commands will return an error. Users should make sure that
+all arguments passed to ``device_add`` are consistent with the documented
+property types.
+
 System accelerators
 -------------------
 

From c9d4e42a8febe4db22eed8463087b38c3c74fd4c Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 15 Nov 2021 15:54:09 +0100
Subject: [PATCH 39/76] softmmu/qdev-monitor: fix use-after-free in
 qdev_set_id()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by Coverity (CID 1465222).

Fixes: 4a1d937796de0fecd8b22d7dbebf87f38e8282fd ("softmmu/qdev-monitor: add error handling in qdev_set_id")
Cc: Damien Hedde <damien.hedde@greensocs.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20211102163342.31162-1-stefanha@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Damien Hedde <damien.hedde@greensocs.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-14-kwolf@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
 softmmu/qdev-monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index b5aaae4b8c..01ec420e61 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp)
         if (prop) {
             dev->id = id;
         } else {
-            g_free(id);
             error_setg(errp, "Duplicate device ID '%s'", id);
+            g_free(id);
             return NULL;
         }
     } else {

From 5dbd0ce115c7720268e653fe928bb6a0c1314a80 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 16 Nov 2021 11:14:31 +0100
Subject: [PATCH 40/76] file-posix: Fix alignment after reopen changing
 O_DIRECT

At the end of a reopen, we already call bdrv_refresh_limits(), which
should update bs->request_alignment according to the new file
descriptor. However, raw_probe_alignment() relies on s->needs_alignment
and just uses 1 if it isn't set. We neglected to update this field, so
starting with cache=writeback and then reopening with cache=none means
that we get an incorrect bs->request_alignment == 1 and unaligned
requests fail instead of being automatically aligned.

Fix this by recalculating s->needs_alignment in raw_refresh_limits()
before calling raw_probe_alignment().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211104113109.56336-1-kwolf@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20211115145409.176785-13-kwolf@redhat.com>
[hreitz: Fix iotest 142 for block sizes greater than 512 by operating on
         a file with a size of 1 MB]
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20211116101431.105252-1-hreitz@redhat.com>
---
 block/file-posix.c         | 20 ++++++++++++++++----
 tests/qemu-iotests/142     | 29 +++++++++++++++++++++++++++++
 tests/qemu-iotests/142.out | 18 ++++++++++++++++++
 3 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 7a27c83060..b283093e5b 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -167,6 +167,7 @@ typedef struct BDRVRawState {
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
+    bool force_alignment;
     bool drop_cache;
     bool check_cache_dropped;
     struct {
@@ -351,6 +352,17 @@ static bool dio_byte_aligned(int fd)
     return false;
 }
 
+static bool raw_needs_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
+        return true;
+    }
+
+    return s->force_alignment;
+}
+
 /* Check if read is allowed with given memory buffer and length.
  *
  * This function is used to check O_DIRECT memory buffer and request alignment.
@@ -728,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
 
     s->has_discard = true;
     s->has_write_zeroes = true;
-    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
-        s->needs_alignment = true;
-    }
 
     if (fstat(s->fd, &st) < 0) {
         ret = -errno;
@@ -784,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
          * so QEMU makes sure all IO operations on the device are aligned
          * to sector size, or else FreeBSD will reject them with EINVAL.
          */
-        s->needs_alignment = true;
+        s->force_alignment = true;
     }
 #endif
+    s->needs_alignment = raw_needs_alignment(bs);
 
 #ifdef CONFIG_XFS
     if (platform_test_xfs_fd(s->fd)) {
@@ -1251,7 +1261,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
     BDRVRawState *s = bs->opaque;
     struct stat st;
 
+    s->needs_alignment = raw_needs_alignment(bs);
     raw_probe_alignment(bs, s->fd, errp);
+
     bs->bl.min_mem_alignment = s->buf_align;
     bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);
 
diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142
index 69fd10ef51..86d65a2d1a 100755
--- a/tests/qemu-iotests/142
+++ b/tests/qemu-iotests/142
@@ -350,6 +350,35 @@ info block backing-file"
 
 echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache"
 
+echo
+echo "--- Alignment after changing O_DIRECT ---"
+echo
+
+# Directly test the protocol level: Can unaligned requests succeed even if
+# O_DIRECT was only enabled through a reopen and vice versa?
+
+# Ensure image size is a multiple of the sector size (required for O_DIRECT)
+$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create
+
+# And write some data (not strictly necessary, but it feels better to actually
+# have something to be read)
+$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IO --cache=writeback -f file $TEST_IMG <<EOF | _filter_qemu_io
+read 42 42
+reopen -o cache.direct=on
+read 42 42
+reopen -o cache.direct=off
+read 42 42
+EOF
+$QEMU_IO --cache=none -f file $TEST_IMG <<EOF | _filter_qemu_io
+read 42 42
+reopen -o cache.direct=off
+read 42 42
+reopen -o cache.direct=on
+read 42 42
+EOF
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/142.out b/tests/qemu-iotests/142.out
index a92b948edd..e20cfc8eb8 100644
--- a/tests/qemu-iotests/142.out
+++ b/tests/qemu-iotests/142.out
@@ -747,4 +747,22 @@ cache.no-flush=on on backing-file
     Cache mode:       writeback
     Cache mode:       writeback, direct
     Cache mode:       writeback, ignore flushes
+
+--- Alignment after changing O_DIRECT ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=file size=1048576
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done

From 76df2b8d695b07d85cce7db753a51cdbfe42b445 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Mon, 15 Nov 2021 16:39:43 -0600
Subject: [PATCH 41/76] nbd/server: Silence clang sanitizer warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

clang's sanitizer is picky: memset(NULL, x, 0) is technically
undefined behavior, even though no sane implementation of memset()
deferences the NULL.  Caught by the nbd-qemu-allocation iotest.

The alternative to checking before each memset is to instead force an
allocation of 1 element instead of g_new0(type, 0)'s behavior of
returning NULL for a 0-length array.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Fixes: 3b1f244c59 (nbd: Allow export of multiple bitmaps for one device)
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20211115223943.626416-1-eblake@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 nbd/server.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 6d03e8a4b4..d9164ee6d0 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2020 Red Hat, Inc.
+ *  Copyright (C) 2016-2021 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Server Side
@@ -879,7 +879,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
             meta->allocation_depth = meta->exp->allocation_depth;
-            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            if (meta->exp->nr_export_bitmaps) {
+                memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            }
         }
         trace_nbd_negotiate_meta_query_parse("empty");
         return true;
@@ -894,7 +896,8 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (nbd_strshift(&query, "dirty-bitmap:")) {
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
         if (!*query) {
-            if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
+                meta->exp->nr_export_bitmaps) {
                 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
             }
             trace_nbd_negotiate_meta_query_parse("empty");
@@ -1024,7 +1027,9 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         /* enable all known contexts */
         meta->base_allocation = true;
         meta->allocation_depth = meta->exp->allocation_depth;
-        memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        if (meta->exp->nr_export_bitmaps) {
+            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        }
     } else {
         for (i = 0; i < nb_queries; ++i) {
             ret = nbd_negotiate_meta_query(client, meta, errp);

From 3d212b41e9ccb3f37d04f22c59a960bac099c1d4 Mon Sep 17 00:00:00 2001
From: "Richard W.M. Jones" <rjones@redhat.com>
Date: Mon, 15 Nov 2021 14:29:43 -0600
Subject: [PATCH 42/76] nbd/server: Add --selinux-label option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Under SELinux, Unix domain sockets have two labels.  One is on the
disk and can be set with commands such as chcon(1).  There is a
different label stored in memory (called the process label).  This can
only be set by the process creating the socket.  When using SELinux +
SVirt and wanting qemu to be able to connect to a qemu-nbd instance,
you must set both labels correctly first.

For qemu-nbd the options to set the second label are awkward.  You can
create the socket in a wrapper program and then exec into qemu-nbd.
Or you could try something with LD_PRELOAD.

This commit adds the ability to set the label straightforwardly on the
command line, via the new --selinux-label flag.  (The name of the flag
is the same as the equivalent nbdkit option.)

A worked example showing how to use the new option can be found in
this bug: https://bugzilla.redhat.com/show_bug.cgi?id=1984938

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1984938
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>

[eblake: rebase to configure changes, reject --selinux-label if it is
not compiled in or not used on a Unix socket]
Note that we may relax some of these restrictions at a later date,
such as making it possible to label a TCP socket, although it may be
smarter to do so as a generic QMP action rather than more one-off
command lines in qemu-nbd.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20211115202944.615966-1-eblake@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
[eblake: adjust meson output as suggested by thuth]
Signed-off-by: Eric Blake <eblake@redhat.com>
---
 meson.build                                   | 10 +++-
 meson_options.txt                             |  3 ++
 qemu-nbd.c                                    | 46 +++++++++++++++++++
 scripts/meson-buildoptions.sh                 |  3 ++
 tests/docker/dockerfiles/centos8.docker       |  1 +
 .../dockerfiles/fedora-i386-cross.docker      |  1 +
 tests/docker/dockerfiles/fedora.docker        |  1 +
 tests/docker/dockerfiles/opensuse-leap.docker |  1 +
 tests/docker/dockerfiles/ubuntu1804.docker    |  1 +
 tests/docker/dockerfiles/ubuntu2004.docker    |  1 +
 10 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 2ece4fe088..084806a941 100644
--- a/meson.build
+++ b/meson.build
@@ -1201,6 +1201,11 @@ keyutils = dependency('libkeyutils', required: false,
 
 has_gettid = cc.has_function('gettid')
 
+# libselinux
+selinux = dependency('libselinux',
+                     required: get_option('selinux'),
+                     method: 'pkg-config', kwargs: static_kwargs)
+
 # Malloc tests
 
 malloc = []
@@ -1479,6 +1484,7 @@ config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found())
 config_host_data.set('CONFIG_SPICE', spice.found())
 config_host_data.set('CONFIG_X11', x11.found())
 config_host_data.set('CONFIG_CFI', get_option('cfi'))
+config_host_data.set('CONFIG_SELINUX', selinux.found())
 config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
 config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0])
 config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1])
@@ -3054,7 +3060,8 @@ if have_tools
   qemu_io = executable('qemu-io', files('qemu-io.c'),
              dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
-               dependencies: [blockdev, qemuutil, gnutls], install: true)
+               dependencies: [blockdev, qemuutil, gnutls, selinux],
+               install: true)
 
   subdir('storage-daemon')
   subdir('contrib/rdmacm-mux')
@@ -3430,6 +3437,7 @@ summary_info += {'libdaxctl support': libdaxctl}
 summary_info += {'libudev':           libudev}
 # Dummy dependency, keep .found()
 summary_info += {'FUSE lseek':        fuse_lseek.found()}
+summary_info += {'selinux':           selinux}
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
 if not supported_cpus.contains(cpu)
diff --git a/meson_options.txt b/meson_options.txt
index 411952bc91..e392323732 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -201,3 +201,6 @@ option('slirp', type: 'combo', value: 'auto',
 option('fdt', type: 'combo', value: 'auto',
        choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
        description: 'Whether and how to find the libfdt library')
+
+option('selinux', type: 'feature', value: 'auto',
+       description: 'SELinux support in qemu-nbd')
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 9d895ba24b..c6c20df68a 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -47,6 +47,10 @@
 #include "trace/control.h"
 #include "qemu-version.h"
 
+#ifdef CONFIG_SELINUX
+#include <selinux/selinux.h>
+#endif
+
 #ifdef __linux__
 #define HAVE_NBD_DEVICE 1
 #else
@@ -64,6 +68,7 @@
 #define QEMU_NBD_OPT_FORK          263
 #define QEMU_NBD_OPT_TLSAUTHZ      264
 #define QEMU_NBD_OPT_PID_FILE      265
+#define QEMU_NBD_OPT_SELINUX_LABEL 266
 
 #define MBR_SIZE 512
 
@@ -116,6 +121,9 @@ static void usage(const char *name)
 "  --fork                    fork off the server process and exit the parent\n"
 "                            once the server is running\n"
 "  --pid-file=PATH           store the server's process ID in the given file\n"
+#ifdef CONFIG_SELINUX
+"  --selinux-label=LABEL     set SELinux process label on listening socket\n"
+#endif
 #if HAVE_NBD_DEVICE
 "\n"
 "Kernel NBD client support:\n"
@@ -454,6 +462,7 @@ static const char *socket_activation_validate_opts(const char *device,
                                                    const char *sockpath,
                                                    const char *address,
                                                    const char *port,
+                                                   const char *selinux,
                                                    bool list)
 {
     if (device != NULL) {
@@ -472,6 +481,10 @@ static const char *socket_activation_validate_opts(const char *device,
         return "TCP port number can't be set when using socket activation";
     }
 
+    if (selinux != NULL) {
+        return "SELinux label can't be set when using socket activation";
+    }
+
     if (list) {
         return "List mode is incompatible with socket activation";
     }
@@ -534,6 +547,8 @@ int main(int argc, char **argv)
         { "trace", required_argument, NULL, 'T' },
         { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
         { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
+        { "selinux-label", required_argument, NULL,
+          QEMU_NBD_OPT_SELINUX_LABEL },
         { NULL, 0, NULL, 0 }
     };
     int ch;
@@ -560,6 +575,7 @@ int main(int argc, char **argv)
     int old_stderr = -1;
     unsigned socket_activation;
     const char *pid_file_name = NULL;
+    const char *selinux_label = NULL;
     BlockExportOptions *export_opts;
 
 #ifdef CONFIG_POSIX
@@ -749,6 +765,9 @@ int main(int argc, char **argv)
         case QEMU_NBD_OPT_PID_FILE:
             pid_file_name = optarg;
             break;
+        case QEMU_NBD_OPT_SELINUX_LABEL:
+            selinux_label = optarg;
+            break;
         }
     }
 
@@ -788,6 +807,7 @@ int main(int argc, char **argv)
         /* Using socket activation - check user didn't use -p etc. */
         const char *err_msg = socket_activation_validate_opts(device, sockpath,
                                                               bindto, port,
+                                                              selinux_label,
                                                               list);
         if (err_msg != NULL) {
             error_report("%s", err_msg);
@@ -827,6 +847,18 @@ int main(int argc, char **argv)
         }
     }
 
+    if (selinux_label) {
+#ifdef CONFIG_SELINUX
+        if (sockpath == NULL && device == NULL) {
+            error_report("--selinux-label is not permitted without --socket");
+            exit(EXIT_FAILURE);
+        }
+#else
+        error_report("SELinux support not enabled in this binary");
+        exit(EXIT_FAILURE);
+#endif
+    }
+
     if (list) {
         saddr = nbd_build_socket_address(sockpath, bindto, port);
         return qemu_nbd_client_list(saddr, tlscreds, bindto);
@@ -940,6 +972,13 @@ int main(int argc, char **argv)
         } else {
             backlog = MIN(shared, SOMAXCONN);
         }
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) {
+            error_report("Cannot set SELinux socket create context to %s: %s",
+                         selinux_label, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
         saddr = nbd_build_socket_address(sockpath, bindto, port);
         if (qio_net_listener_open_sync(server, saddr, backlog,
                                        &local_err) < 0) {
@@ -947,6 +986,13 @@ int main(int argc, char **argv)
             error_report_err(local_err);
             exit(EXIT_FAILURE);
         }
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(NULL) == -1) {
+            error_report("Cannot clear SELinux socket create context: %s",
+                         strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
     } else {
         size_t i;
         /* See comment in check_socket_activation above. */
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 45e1f2e20d..7a17ff4218 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -72,6 +72,7 @@ meson_options_help() {
   printf "%s\n" '  sdl             SDL user interface'
   printf "%s\n" '  sdl-image       SDL Image support for icons'
   printf "%s\n" '  seccomp         seccomp support'
+  printf "%s\n" '  selinux         SELinux support in qemu-nbd'
   printf "%s\n" '  smartcard       CA smartcard emulation support'
   printf "%s\n" '  snappy          snappy compression support'
   printf "%s\n" '  sparse          sparse checker'
@@ -215,6 +216,8 @@ _meson_option_parse() {
     --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;;
     --enable-seccomp) printf "%s" -Dseccomp=enabled ;;
     --disable-seccomp) printf "%s" -Dseccomp=disabled ;;
+    --enable-selinux) printf "%s" -Dselinux=enabled ;;
+    --disable-selinux) printf "%s" -Dselinux=disabled ;;
     --enable-slirp) printf "%s" -Dslirp=enabled ;;
     --disable-slirp) printf "%s" -Dslirp=disabled ;;
     --enable-slirp=*) quote_sh "-Dslirp=$2" ;;
diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker
index 46398c61ee..7f135f8e8c 100644
--- a/tests/docker/dockerfiles/centos8.docker
+++ b/tests/docker/dockerfiles/centos8.docker
@@ -51,6 +51,7 @@ ENV PACKAGES \
     libpng-devel \
     librbd-devel \
     libseccomp-devel \
+    libselinux-devel \
     libslirp-devel \
     libssh-devel \
     libtasn1-devel \
diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker
index f62a71ce22..13328e6081 100644
--- a/tests/docker/dockerfiles/fedora-i386-cross.docker
+++ b/tests/docker/dockerfiles/fedora-i386-cross.docker
@@ -8,6 +8,7 @@ ENV PACKAGES \
     gcc \
     git \
     libffi-devel.i686 \
+    libselinux-devel.i686 \
     libtasn1-devel.i686 \
     libzstd-devel.i686 \
     make \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index eec1add7f6..c6fd7e1113 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -53,6 +53,7 @@ ENV PACKAGES \
     libpng-devel \
     librbd-devel \
     libseccomp-devel \
+    libselinux-devel \
     libslirp-devel \
     libssh-devel \
     libtasn1-devel \
diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker
index 5a8bee0289..3bbdb67f4f 100644
--- a/tests/docker/dockerfiles/opensuse-leap.docker
+++ b/tests/docker/dockerfiles/opensuse-leap.docker
@@ -55,6 +55,7 @@ ENV PACKAGES \
     libpulse-devel \
     librbd-devel \
     libseccomp-devel \
+    libselinux-devel \
     libspice-server-devel \
     libssh-devel \
     libtasn1-devel \
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
index 0880bf3e29..450fd06d0d 100644
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -60,6 +60,7 @@ ENV PACKAGES \
     libsdl2-dev \
     libsdl2-image-dev \
     libseccomp-dev \
+    libselinux-dev \
     libsnappy-dev \
     libspice-protocol-dev \
     libspice-server-dev \
diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker
index 39de63d012..15a026be09 100644
--- a/tests/docker/dockerfiles/ubuntu2004.docker
+++ b/tests/docker/dockerfiles/ubuntu2004.docker
@@ -60,6 +60,7 @@ ENV PACKAGES \
     libsdl2-dev \
     libsdl2-image-dev \
     libseccomp-dev \
+    libselinux-dev \
     libslirp-dev \
     libsnappy-dev \
     libspice-protocol-dev \

From d47e3751b5b642b9a3d9604f48717a51f1cb0c01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Mon, 15 Nov 2021 14:29:10 +0000
Subject: [PATCH 43/76] tests/docker: force NOUSER=1 for base images
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As base images are often used to build further images like toolchains
ensure we don't add the local user by accident. The local user should
only exist on local images and not anything that gets pushed up to the
public registry.

Reported-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20211115142915.3797652-2-alex.bennee@linaro.org>
---
 tests/docker/Makefile.include | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 7a63a3b7f7..f1a0c5db7a 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -150,6 +150,9 @@ docker-image-debian-sparc64-cross: docker-image-debian10
 # The native build should never use the registry
 docker-image-debian-native: DOCKER_REGISTRY=
 
+# base images should not add a local user
+docker-image-debian10: NOUSER=1
+docker-image-debian11: NOUSER=1
 
 #
 # The build rule for hexagon-cross is special in so far for most of

From 81c9b06ea0bf7d5e0c0b00aae41df9060c063cfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Mon, 15 Nov 2021 14:29:11 +0000
Subject: [PATCH 44/76] tests/vm: sort the special variable list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Making the list alphabetical makes it easier to find the config option
you are looking for.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211115142915.3797652-3-alex.bennee@linaro.org>
---
 tests/vm/Makefile.include | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index f3a3a1c751..f8ca619cf2 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -52,21 +52,21 @@ endif
 	@echo
 	@echo "Special variables:"
 	@echo "    BUILD_TARGET=foo		 - Override the build target"
-	@echo "    TARGET_LIST=a,b,c    	 - Override target list in builds"
-	@echo '    EXTRA_CONFIGURE_OPTS="..."'
-	@echo "    J=[0..9]*            	 - Override the -jN parameter for make commands"
 	@echo "    DEBUG=1              	 - Enable verbose output on host and interactive debugging"
+	@echo '    EXTRA_CONFIGURE_OPTS="..."   - Pass to configure step'
+	@echo "    J=[0..9]*            	 - Override the -jN parameter for make commands"
 	@echo "    LOG_CONSOLE=1        	 - Log console to file in: ~/.cache/qemu-vm "
-	@echo "    V=1				 - Enable verbose ouput on host and guest commands"
-	@echo "    QEMU_LOCAL=1                 - Use QEMU binary local to this build."
 	@echo "    QEMU=/path/to/qemu		 - Change path to QEMU binary"
-	@echo "    QEMU_IMG=/path/to/qemu-img	 - Change path to qemu-img tool"
 ifeq ($(HAVE_PYTHON_YAML),yes)
 	@echo "    QEMU_CONFIG=/path/conf.yml   - Change path to VM configuration .yml file."
 else
 	@echo "    (install python3-yaml to enable support for yaml file to configure a VM.)"
 endif
 	@echo "                                   See conf_example_*.yml for file format details."
+	@echo "    QEMU_IMG=/path/to/qemu-img	 - Change path to qemu-img tool"
+	@echo "    QEMU_LOCAL=1                 - Use QEMU binary local to this build."
+	@echo "    TARGET_LIST=a,b,c    	 - Override target list in builds"
+	@echo "    V=1				 - Enable verbose ouput on host and guest commands"
 
 vm-build-all: $(addprefix vm-build-, $(IMAGES))
 

From ebd654aabc1a9128b2ea390faa23c2fb6b4b13b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Mon, 15 Nov 2021 14:29:12 +0000
Subject: [PATCH 45/76] tests/vm: don't build using TCG by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While it is useful to run these images using TCG their performance
will not be anything like the native guests. Don't do it by default.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/393
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211115142915.3797652-4-alex.bennee@linaro.org>
---
 tests/vm/Makefile.include | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index f8ca619cf2..ae91f5043e 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -2,16 +2,24 @@
 
 .PHONY: vm-build-all vm-clean-all
 
+HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
+
 EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
-IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
+X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
 ifneq ($(GENISOIMAGE),)
-IMAGES += ubuntu.i386 centos
+X86_IMAGES += ubuntu.i386 centos
 ifneq ($(EFI_AARCH64),)
-IMAGES += ubuntu.aarch64 centos.aarch64
+ARM64_IMAGES += ubuntu.aarch64 centos.aarch64
 endif
 endif
 
+ifeq ($(HOST_ARCH),x86_64)
+IMAGES=$(X86_IMAGES) $(if $(USE_TCG),$(ARM64_IMAGES))
+else ifeq ($(HOST_ARCH),aarch64)
+IMAGES=$(ARM64_IMAGES) $(if $(USE_TCG),$(X86_IMAGES))
+endif
+
 IMAGES_DIR := $(HOME)/.cache/qemu-vm/images
 IMAGE_FILES := $(patsubst %, $(IMAGES_DIR)/%.img, $(IMAGES))
 
@@ -43,7 +51,7 @@ else
 endif
 	@echo "  vm-build-haiku.x86_64           - Build QEMU in Haiku VM"
 	@echo ""
-	@echo "  vm-build-all                    - Build QEMU in all VMs"
+	@echo "  vm-build-all                    - Build QEMU in: $(IMAGES)"
 	@echo "  vm-clean-all                    - Clean up VM images"
 	@echo
 	@echo "For trouble-shooting:"
@@ -56,6 +64,7 @@ endif
 	@echo '    EXTRA_CONFIGURE_OPTS="..."   - Pass to configure step'
 	@echo "    J=[0..9]*            	 - Override the -jN parameter for make commands"
 	@echo "    LOG_CONSOLE=1        	 - Log console to file in: ~/.cache/qemu-vm "
+	@echo "    USE_TCG=1        	         - Use TCG for cross-arch images"
 	@echo "    QEMU=/path/to/qemu		 - Change path to QEMU binary"
 ifeq ($(HAVE_PYTHON_YAML),yes)
 	@echo "    QEMU_CONFIG=/path/conf.yml   - Change path to VM configuration .yml file."

From a399f9143e801efbc574495c7df0eba3bd729e9f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 15 Nov 2021 14:29:13 +0000
Subject: [PATCH 46/76] meson: remove useless libdl test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dlopen is never used after it is sought via cc.find_library, because
plugins use gmodule instead; remove the test.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20211110092454.30916-1-pbonzini@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211115142915.3797652-5-alex.bennee@linaro.org>
---
 accel/tcg/meson.build | 2 +-
 meson.build           | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 137a1a44cc..7a0a79d731 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -10,7 +10,7 @@ tcg_ss.add(files(
 ))
 tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
 tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
-tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
+tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 
 specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
diff --git a/meson.build b/meson.build
index 2ece4fe088..baeaee4522 100644
--- a/meson.build
+++ b/meson.build
@@ -566,13 +566,7 @@ endif
 spice_headers = spice.partial_dependency(compile_args: true, includes: true)
 
 rt = cc.find_library('rt', required: false)
-libdl = not_found
-if 'CONFIG_PLUGIN' in config_host
-  libdl = cc.find_library('dl', required: false)
-  if not cc.has_function('dlopen', dependencies: libdl)
-    error('dlopen not found')
-  endif
-endif
+
 libiscsi = not_found
 if not get_option('libiscsi').auto() or have_block
   libiscsi = dependency('libiscsi', version: '>=1.9.0',

From d7c2e2b3f447133175be5817663147d1309bde2d Mon Sep 17 00:00:00 2001
From: Cleber Rosa <crosa@redhat.com>
Date: Mon, 15 Nov 2021 14:29:14 +0000
Subject: [PATCH 47/76] Jobs based on custom runners: add CentOS Stream 8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This introduces three different parts of a job designed to run
on a custom runner managed by Red Hat.  The goals include:

  a) propose a model for other organizations that want to onboard
     their own runners, with their specific platforms, build
     configuration and tests.

  b) bring awareness to the differences between upstream QEMU and the
     version available under CentOS Stream, which is "A preview of
     upcoming Red Hat Enterprise Linux minor and major releases".

  c) because of b), it should be easier to identify and reduce the gap
     between Red Hat's downstream and upstream QEMU.

The components of this custom job are:

  I) OS build environment setup code:

     - additions to the existing "build-environment.yml" playbook
       that can be used to set up CentOS/EL 8 systems.

     - a CentOS Stream 8 specific "build-environment.yml" playbook
       that adds to the generic one.

 II) QEMU build configuration: a script that will produce binaries with
     features as similar as possible to the ones built and packaged on
     CentOS stream 8.

III) Scripts that define the minimum amount of testing that the
     binaries built with the given configuration (point II) under the
     given OS build environment (point I) should be subjected to.

 IV) Job definition: GitLab CI jobs that will dispatch the build/test
     jobs (see points #II and #III) to the machine specifically
     configured according to #I.

Signed-off-by: Cleber Rosa <crosa@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Tested-by: Willian Rampazzo <willianr@redhat.com>
Message-Id: <20211111160501.862396-2-crosa@redhat.com>
Message-Id: <20211115142915.3797652-6-alex.bennee@linaro.org>
---
 .gitlab-ci.d/custom-runners.yml               |  29 +++
 docs/devel/ci-jobs.rst.inc                    |   7 +
 .../org.centos/stream/8/build-environment.yml |  51 +++++
 .../ci/org.centos/stream/8/x86_64/configure   | 208 ++++++++++++++++++
 .../org.centos/stream/8/x86_64/test-avocado   |  70 ++++++
 scripts/ci/org.centos/stream/README           |  17 ++
 scripts/ci/setup/build-environment.yml        |  38 ++++
 7 files changed, 420 insertions(+)
 create mode 100644 scripts/ci/org.centos/stream/8/build-environment.yml
 create mode 100755 scripts/ci/org.centos/stream/8/x86_64/configure
 create mode 100755 scripts/ci/org.centos/stream/8/x86_64/test-avocado
 create mode 100644 scripts/ci/org.centos/stream/README

diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml
index a89a20da48..1f56297dfa 100644
--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -248,3 +248,32 @@ ubuntu-20.04-aarch64-notcg:
  - ../configure --disable-libssh --disable-tcg
  - make --output-sync -j`nproc`
  - make --output-sync -j`nproc` check V=1
+
+centos-stream-8-x86_64:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - centos_stream_8
+ - x86_64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
+ artifacts:
+   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+   when: on_failure
+   expire_in: 7 days
+   paths:
+     - build/tests/results/latest/results.xml
+     - build/tests/results/latest/test-results
+   reports:
+     junit: build/tests/results/latest/results.xml
+ before_script:
+ - JOBS=$(expr $(nproc) + 1)
+ script:
+ - mkdir build
+ - cd build
+ - ../scripts/ci/org.centos/stream/8/x86_64/configure
+ - make -j"$JOBS"
+ - make NINJA=":" check
+ - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado
diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc
index 277975e4ad..db3f571d5f 100644
--- a/docs/devel/ci-jobs.rst.inc
+++ b/docs/devel/ci-jobs.rst.inc
@@ -49,3 +49,10 @@ S390X_RUNNER_AVAILABLE
 If you've got access to an IBM Z host that can be used as a gitlab-CI
 runner, you can set this variable to enable the tests that require this
 kind of host. The runner should be tagged with "s390x".
+
+CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If you've got access to a CentOS Stream 8 x86_64 host that can be
+used as a gitlab-CI runner, you can set this variable to enable the
+tests that require this kind of host. The runner should be tagged with
+both "centos_stream_8" and "x86_64".
diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml
new file mode 100644
index 0000000000..42b0471634
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/build-environment.yml
@@ -0,0 +1,51 @@
+---
+- name: Installation of extra packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Extra check for CentOS Stream 8
+      lineinfile:
+        path: /etc/redhat-release
+        line: CentOS Stream release 8
+        state: present
+      check_mode: yes
+      register: centos_stream_8
+
+    - name: Enable PowerTools repo on CentOS Stream 8
+      ini_file:
+        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
+        section: powertools
+        option: enabled
+        value: "1"
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+        - ansible_facts['distribution_major_version'] == '8'
+        - centos_stream_8
+
+    - name: Install basic packages to build QEMU on CentOS Stream 8
+      dnf:
+        name:
+          - device-mapper-multipath-devel
+          - glusterfs-api-devel
+          - gnutls-devel
+          - libcap-ng-devel
+          - libcurl-devel
+          - libfdt-devel
+          - libiscsi-devel
+          - libpmem-devel
+          - librados-devel
+          - librbd-devel
+          - libseccomp-devel
+          - libssh-devel
+          - libxkbcommon-devel
+          - ninja-build
+          - numactl-devel
+          - python3-sphinx
+          - redhat-rpm-config
+          - snappy-devel
+          - spice-server-devel
+          - systemd-devel
+        state: present
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+        - ansible_facts['distribution_major_version'] == '8'
+        - centos_stream_8
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
new file mode 100755
index 0000000000..048e80dc49
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -0,0 +1,208 @@
+#!/bin/sh -e
+#
+# Configuration for QEMU based on CentOS Stream 8 x86_64 builds
+#
+# The "configure" command line is based on:
+#
+# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec
+#
+# But, because the SPEC file contains a number of conditionals and
+# variable and expansions only available at RPM build time, this version
+# was initially generated from an actual RPM build on an x86_64 platform.
+#
+# From that initial version, options that are required or are a
+# consequence of non-upstream patches have been adapted.  One example
+# is "--without-default-devices" which is *not* present here, given
+# that patches adding downstream specific devices are not available.
+#
+../configure \
+--prefix="/usr" \
+--libdir="/usr/lib64" \
+--datadir="/usr/share" \
+--sysconfdir="/etc" \
+--interp-prefix=/usr/qemu-%M \
+--localstatedir="/var" \
+--docdir="/usr/share/doc" \
+--libexecdir="/usr/libexec" \
+--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \
+--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \
+--with-suffix="qemu-kvm" \
+--firmwarepath=/usr/share/qemu-firmware \
+--with-git=meson \
+--with-git-submodules=update \
+--target-list="x86_64-softmmu" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--audio-drv-list="" \
+--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
+--with-coroutine=ucontext \
+--with-git=git \
+--tls-priority=@QEMU,SYSTEM \
+--disable-attr \
+--disable-auth-pam \
+--disable-avx2 \
+--disable-avx512f \
+--disable-bochs \
+--disable-bpf \
+--disable-brlapi \
+--disable-bsd-user \
+--disable-bzip2 \
+--disable-cap-ng \
+--disable-capstone \
+--disable-cfi \
+--disable-cfi-debug \
+--disable-cloop \
+--disable-cocoa \
+--disable-coroutine-pool \
+--disable-crypto-afalg \
+--disable-curl \
+--disable-curses \
+--disable-debug-info \
+--disable-debug-mutex \
+--disable-debug-tcg \
+--disable-dmg \
+--disable-docs \
+--disable-fuse \
+--disable-fuse-lseek \
+--disable-gcrypt \
+--disable-gio \
+--disable-glusterfs \
+--disable-gnutls \
+--disable-gtk \
+--disable-guest-agent \
+--disable-guest-agent-msi \
+--disable-hax \
+--disable-hvf \
+--disable-iconv \
+--disable-kvm \
+--disable-libdaxctl \
+--disable-libiscsi \
+--disable-libnfs \
+--disable-libpmem \
+--disable-libssh \
+--disable-libudev \
+--disable-libusb \
+--disable-libxml2 \
+--disable-linux-aio \
+--disable-linux-io-uring \
+--disable-linux-user \
+--disable-live-block-migration \
+--disable-lto \
+--disable-lzfse \
+--disable-lzo \
+--disable-malloc-trim \
+--disable-membarrier \
+--disable-modules \
+--disable-module-upgrades \
+--disable-mpath \
+--disable-multiprocess \
+--disable-netmap \
+--disable-nettle \
+--disable-numa \
+--disable-nvmm \
+--disable-opengl \
+--disable-parallels \
+--disable-pie \
+--disable-pvrdma \
+--disable-qcow1 \
+--disable-qed \
+--disable-qom-cast-debug \
+--disable-rbd \
+--disable-rdma \
+--disable-replication \
+--disable-rng-none \
+--disable-safe-stack \
+--disable-sanitizers \
+--disable-sdl \
+--disable-sdl-image \
+--disable-seccomp \
+--disable-slirp-smbd \
+--disable-smartcard \
+--disable-snappy \
+--disable-sparse \
+--disable-spice \
+--disable-strip \
+--disable-system \
+--disable-tcg \
+--disable-tools \
+--disable-tpm \
+--disable-u2f \
+--disable-usb-redir \
+--disable-user \
+--disable-vde \
+--disable-vdi \
+--disable-vhost-crypto \
+--disable-vhost-kernel \
+--disable-vhost-net \
+--disable-vhost-scsi \
+--disable-vhost-user \
+--disable-vhost-user-blk-server \
+--disable-vhost-vdpa \
+--disable-vhost-vsock \
+--disable-virglrenderer \
+--disable-virtfs \
+--disable-virtiofsd \
+--disable-vnc \
+--disable-vnc-jpeg \
+--disable-vnc-png \
+--disable-vnc-sasl \
+--disable-vte \
+--disable-vvfat \
+--disable-werror \
+--disable-whpx \
+--disable-xen \
+--disable-xen-pci-passthrough \
+--disable-xfsctl \
+--disable-xkbcommon \
+--disable-zstd \
+--enable-attr \
+--enable-avx2 \
+--enable-cap-ng \
+--enable-capstone \
+--enable-coroutine-pool \
+--enable-curl \
+--enable-debug-info \
+--enable-docs \
+--enable-fdt \
+--enable-gcrypt \
+--enable-glusterfs \
+--enable-gnutls \
+--enable-guest-agent \
+--enable-iconv \
+--enable-kvm \
+--enable-libiscsi \
+--enable-libpmem \
+--enable-libssh \
+--enable-libusb \
+--enable-libudev \
+--enable-linux-aio \
+--enable-lzo \
+--enable-malloc-trim \
+--enable-modules \
+--enable-mpath \
+--enable-numa \
+--enable-opengl \
+--enable-pie \
+--enable-rbd \
+--enable-rdma \
+--enable-seccomp \
+--enable-snappy \
+--enable-smartcard \
+--enable-spice \
+--enable-system \
+--enable-tcg \
+--enable-tools \
+--enable-tpm \
+--enable-trace-backend=dtrace \
+--enable-usb-redir \
+--enable-virtiofsd \
+--enable-vhost-kernel \
+--enable-vhost-net \
+--enable-vhost-user \
+--enable-vhost-user-blk-server \
+--enable-vhost-vdpa \
+--enable-vhost-vsock \
+--enable-vnc \
+--enable-vnc-png \
+--enable-vnc-sasl \
+--enable-werror \
+--enable-xkbcommon
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
new file mode 100755
index 0000000000..7aeecbcfb8
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
@@ -0,0 +1,70 @@
+#!/bin/sh -e
+#
+# Runs a previously vetted list of tests, either marked explicitly for
+# KVM and x86_64, or tests that are generic enough to be valid for all
+# targets. Such a test list can be generated with:
+#
+# ./tests/venv/bin/avocado list --filter-by-tags-include-empty \
+#   --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \
+#   tests/avocado/
+#
+# This is almost the complete list of avocado based tests available at
+# the time this was compile, with the following exceptions:
+#
+# * Require machine type "x-remote":
+#   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
+#
+# * Needs superuser privileges:
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs
+#
+# * Requires display type "egl-headless":
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl
+#
+#  * Test is marked (unconditionally) to be skipped:
+#   - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types
+#
+make get-vm-images
+./tests/venv/bin/avocado run \
+    --job-results-dir=tests/results/ \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \
+    tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \
+    tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
+    tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
+    tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
+    tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \
+    tests/avocado/migration.py:Migration.test_migration_with_exec \
+    tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \
+    tests/avocado/migration.py:Migration.test_migration_with_unix \
+    tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \
+    tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \
+    tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \
+    tests/avocado/version.py:Version.test_qmp_human_info_version \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \
+    tests/avocado/vnc.py:Vnc.test_change_password \
+    tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \
+    tests/avocado/vnc.py:Vnc.test_no_vnc \
+    tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias
diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README
new file mode 100644
index 0000000000..e3eadfe3ea
--- /dev/null
+++ b/scripts/ci/org.centos/stream/README
@@ -0,0 +1,17 @@
+This directory contains scripts for generating a build of QEMU that
+closely matches the CentOS Stream[1] builds of the qemu-kvm package.
+
+To have the environment ready to configure, build QEMU and run tests,
+please start with a CentOS Stream machine and:
+
+ * apply the generic "build-environment.yml" playbook located at
+   scripts/ci/setup
+
+ * apply the "build-environment.yml" in the directory following the
+   CentOS Stream version (such as "8").
+
+This currently only covers CentOS Stream 8 environments and
+packages[2].
+
+[1] https://www.centos.org/centos-stream/
+[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel
diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml
index 581c1c75d1..599896cc5b 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -114,3 +114,41 @@
       when:
         - ansible_facts['distribution'] == 'Ubuntu'
         - ansible_facts['distribution_version'] == '20.04'
+
+    - name: Install basic packages to build QEMU on EL8
+      dnf:
+        # This list of packages start with tests/docker/dockerfiles/centos8.docker
+        # but only include files that are common to all distro variants and present
+        # in the standard repos (no add-ons)
+        name:
+          - bzip2
+          - bzip2-devel
+          - dbus-daemon
+          - diffutils
+          - gcc
+          - gcc-c++
+          - genisoimage
+          - gettext
+          - git
+          - glib2-devel
+          - libaio-devel
+          - libepoxy-devel
+          - libgcrypt-devel
+          - lzo-devel
+          - make
+          - mesa-libEGL-devel
+          - nettle-devel
+          - nmap-ncat
+          - perl-Test-Harness
+          - pixman-devel
+          - python36
+          - rdma-core-devel
+          - spice-glib-devel
+          - spice-server
+          - systemtap-sdt-devel
+          - tar
+          - zlib-devel
+        state: present
+      when:
+        - ansible_facts['distribution_file_variety'] == 'RedHat'
+        - ansible_facts['distribution_version'] == '8'

From 60bec83e8a5c7cbaa47648f4b3370c8d53e48982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 14:29:15 +0000
Subject: [PATCH 48/76] gitlab-ci: Split custom-runners.yml in one file per
 runner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To ease maintenance, add the custom-runners/ directory and
split custom-runners.yml in 3 files, all included by the
current custom-runners.yml:
 - ubuntu-18.04-s390x.yml
 - ubuntu-20.04-aarch64.yml
 - centos-stream-8-x86_64.yml

Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20211115095608.2436223-1-philmd@redhat.com>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Message-Id: <20211115142915.3797652-7-alex.bennee@linaro.org>
---
 .gitlab-ci.d/custom-runners.yml               | 268 +-----------------
 .../custom-runners/centos-stream-8-x86_64.yml |  28 ++
 .../custom-runners/ubuntu-18.04-s390x.yml     | 118 ++++++++
 .../custom-runners/ubuntu-20.04-aarch64.yml   | 118 ++++++++
 4 files changed, 268 insertions(+), 264 deletions(-)
 create mode 100644 .gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
 create mode 100644 .gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
 create mode 100644 .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml

diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml
index 1f56297dfa..056c374619 100644
--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -13,267 +13,7 @@
 variables:
   GIT_STRATEGY: clone
 
-# All ubuntu-18.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
-ubuntu-18.04-s390x-all-linux-static:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$S390X_RUNNER_AVAILABLE"
- script:
- # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
- # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
- - mkdir build
- - cd build
- - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
- - make --output-sync -j`nproc` check-tcg V=1
-
-ubuntu-18.04-s390x-all:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$S390X_RUNNER_AVAILABLE"
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-alldbg:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --enable-debug --disable-libssh
- - make clean
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-clang:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-18.04-s390x-tci:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --enable-tcg-interpreter
- - make --output-sync -j`nproc`
-
-ubuntu-18.04-s390x-notcg:
- needs: []
- stage: build
- tags:
- - ubuntu_18.04
- - s390x
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$S390X_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --disable-tcg
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-# All ubuntu-20.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/qemu/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
-ubuntu-20.04-aarch64-all-linux-static:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$AARCH64_RUNNER_AVAILABLE"
- script:
- # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
- # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
- - mkdir build
- - cd build
- - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
- - make --output-sync -j`nproc` check-tcg V=1
-
-ubuntu-20.04-aarch64-all:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-alldbg:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$AARCH64_RUNNER_AVAILABLE"
- script:
- - mkdir build
- - cd build
- - ../configure --enable-debug --disable-libssh
- - make clean
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-clang:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-ubuntu-20.04-aarch64-tci:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --enable-tcg-interpreter
- - make --output-sync -j`nproc`
-
-ubuntu-20.04-aarch64-notcg:
- needs: []
- stage: build
- tags:
- - ubuntu_20.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-libssh --disable-tcg
- - make --output-sync -j`nproc`
- - make --output-sync -j`nproc` check V=1
-
-centos-stream-8-x86_64:
- allow_failure: true
- needs: []
- stage: build
- tags:
- - centos_stream_8
- - x86_64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
- artifacts:
-   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-   when: on_failure
-   expire_in: 7 days
-   paths:
-     - build/tests/results/latest/results.xml
-     - build/tests/results/latest/test-results
-   reports:
-     junit: build/tests/results/latest/results.xml
- before_script:
- - JOBS=$(expr $(nproc) + 1)
- script:
- - mkdir build
- - cd build
- - ../scripts/ci/org.centos/stream/8/x86_64/configure
- - make -j"$JOBS"
- - make NINJA=":" check
- - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado
+include:
+  - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml'
+  - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml'
+  - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml'
diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
new file mode 100644
index 0000000000..49aa703f55
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
@@ -0,0 +1,28 @@
+centos-stream-8-x86_64:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - centos_stream_8
+ - x86_64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
+ artifacts:
+   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+   when: on_failure
+   expire_in: 7 days
+   paths:
+     - build/tests/results/latest/results.xml
+     - build/tests/results/latest/test-results
+   reports:
+     junit: build/tests/results/latest/results.xml
+ before_script:
+ - JOBS=$(expr $(nproc) + 1)
+ script:
+ - mkdir build
+ - cd build
+ - ../scripts/ci/org.centos/stream/8/x86_64/configure
+ - make -j"$JOBS"
+ - make NINJA=":" check
+ - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
new file mode 100644
index 0000000000..f39d874a1e
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
@@ -0,0 +1,118 @@
+# All ubuntu-18.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+
+ubuntu-18.04-s390x-all-linux-static:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-18.04-s390x-all:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-alldbg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-clang:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-tci:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-18.04-s390x-notcg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
new file mode 100644
index 0000000000..920e388bd0
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
@@ -0,0 +1,118 @@
+# All ubuntu-20.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/qemu/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+
+ubuntu-20.04-aarch64-all-linux-static:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-20.04-aarch64-all:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-alldbg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-clang:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-tci:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-20.04-aarch64-notcg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1

From 9968de0a4a5470bd7b98dcd2fae5d5269908f16b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Tue, 16 Nov 2021 11:27:57 +0000
Subject: [PATCH 49/76] gitlab: skip cirrus jobs on master and stable branches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On the primary QEMU repository we want the CI jobs to run on the staging
branch as a gating CI test.

Cirrus CI has very limited job concurrency, so if there are too many
jobs triggered they'll queue up and hit the GitLab CI job timeout before
they complete on Cirrus.

If we let Cirrus jobs run again on the master branch immediately after
merging from staging, that just increases the chances jobs will get
queued and subsequently timeout.

The same applies for merges to the stable branches.

User forks meanwhile should be allowed to run Cirrus CI jobs freely.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Message-Id: <20211116112757.1909176-1-berrange@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 .gitlab-ci.d/cirrus.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index e7b25e7427..cc2f2e8906 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -40,6 +40,9 @@
     - cat .gitlab-ci.d/cirrus/$NAME.yml
     - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml
   rules:
+    # Allow on 'staging' branch and 'stable-X.Y-staging' branches only
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/'
+      when: never
     - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN"
 
 x64-freebsd-12-build:

From f26bd6ff21df2f1d155ca17eef360423b706ab7f Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 11 Nov 2021 09:37:15 -0500
Subject: [PATCH 50/76] python/aqmp: Fix disconnect during capabilities
 negotiation

If we receive ConnectionResetError (ECONNRESET) while attempting to
perform capabilities negotiation -- prior to the establishment of the
async reader/writer tasks -- the disconnect function is not aware that
we are in an error pathway.

As a result, when attempting to close the StreamWriter, we'll see the
same ConnectionResetError that caused us to initiate a disconnect in the
first place, which will cause the disconnect task itself to fail, which
emits a CRITICAL logging event.

I still don't know if there's a smarter way to check to see if an
exception received at this point is "the same" exception as the one that
caused the initial disconnect, but for now the problem can be avoided by
improving the error pathway detection in the exit path.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 20211111143719.2162525-2-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
---
 python/qemu/aqmp/protocol.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py
index ae1df24026..860b79512d 100644
--- a/python/qemu/aqmp/protocol.py
+++ b/python/qemu/aqmp/protocol.py
@@ -623,13 +623,21 @@ class AsyncProtocol(Generic[T]):
         def _done(task: Optional['asyncio.Future[Any]']) -> bool:
             return task is not None and task.done()
 
-        # NB: We can't rely on _bh_tasks being done() here, it may not
-        #     yet have had a chance to run and gather itself.
+        # Are we already in an error pathway? If either of the tasks are
+        # already done, or if we have no tasks but a reader/writer; we
+        # must be.
+        #
+        # NB: We can't use _bh_tasks to check for premature task
+        # completion, because it may not yet have had a chance to run
+        # and gather itself.
         tasks = tuple(filter(None, (self._writer_task, self._reader_task)))
         error_pathway = _done(self._reader_task) or _done(self._writer_task)
+        if not tasks:
+            error_pathway |= bool(self._reader) or bool(self._writer)
 
         try:
-            # Try to flush the writer, if possible:
+            # Try to flush the writer, if possible.
+            # This *may* cause an error and force us over into the error path.
             if not error_pathway:
                 await self._bh_flush_writer()
         except BaseException as err:
@@ -639,7 +647,7 @@ class AsyncProtocol(Generic[T]):
             self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
             raise
         finally:
-            # Cancel any still-running tasks:
+            # Cancel any still-running tasks (Won't raise):
             if self._writer_task is not None and not self._writer_task.done():
                 self.logger.debug("Cancelling writer task.")
                 self._writer_task.cancel()
@@ -652,7 +660,7 @@ class AsyncProtocol(Generic[T]):
                 self.logger.debug("Waiting for tasks to complete ...")
                 await asyncio.wait(tasks)
 
-            # Lastly, close the stream itself. (May raise):
+            # Lastly, close the stream itself. (*May raise*!):
             await self._bh_close_stream(error_pathway)
             self.logger.debug("Disconnected.")
 

From 25de7f50121f251c45d111582d786db7ce0768d3 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 11 Nov 2021 09:37:16 -0500
Subject: [PATCH 51/76] python/aqmp: fix ConnectError string method

When ConnectError is used to wrap an Exception that was initialized
without an error message, we are treated to a traceback with a rubbish
line like this:

... ConnectError: Failed to establish session:

Correct this to use the name of an exception as a fallback message:

... ConnectError: Failed to establish session: EOFError

Better!

Signed-off-by: John Snow <jsnow@redhat.com>
Reported-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 20211111143719.2162525-3-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
---
 python/qemu/aqmp/protocol.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py
index 860b79512d..5190b33b13 100644
--- a/python/qemu/aqmp/protocol.py
+++ b/python/qemu/aqmp/protocol.py
@@ -79,7 +79,11 @@ class ConnectError(AQMPError):
         self.exc: Exception = exc
 
     def __str__(self) -> str:
-        return f"{self.error_message}: {self.exc!s}"
+        cause = str(self.exc)
+        if not cause:
+            # If there's no error string, use the exception name.
+            cause = exception_summary(self.exc)
+        return f"{self.error_message}: {cause}"
 
 
 class StateError(AQMPError):

From 47b43acd57e6e0b2910683f00a758adbc418dde6 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 11 Nov 2021 09:37:17 -0500
Subject: [PATCH 52/76] scripts/device-crash-test: simplify Exception handling

We don't need to handle KeyboardInterruptError specifically; we can
instead tighten the scope of the broad Exception handlers to only catch
"Exception", which has the effect of allowing all BaseException classes
that do not inherit from Exception to be raised through.

KeyboardInterruptError and a few other important ones are
BaseExceptions, so this does the same thing with less code.

Signed-off-by: John Snow <jsnow@redhat.com>
Reported-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 20211111143719.2162525-4-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
---
 scripts/device-crash-test | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 8331c057b8..d91e8616ef 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -317,9 +317,7 @@ class QemuBinaryInfo(object):
         try:
             vm.launch()
             mi['runnable'] = True
-        except KeyboardInterrupt:
-            raise
-        except:
+        except Exception:
             dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
             dbg("log: %r", vm.get_log())
             mi['runnable'] = False
@@ -360,9 +358,7 @@ def checkOneCase(args, testcase):
     exc_traceback = None
     try:
         vm.launch()
-    except KeyboardInterrupt:
-        raise
-    except:
+    except Exception:
         exc_traceback = traceback.format_exc()
         dbg("Exception while running test case")
     finally:

From 76f86e78b255d17aad2ebd1177069c86f08039ef Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 11 Nov 2021 09:37:18 -0500
Subject: [PATCH 53/76] scripts/device-crash-test: don't emit AQMP connection
 errors to stdout

These errors are expected, so they shouldn't clog up terminal output. In
the event that they're *not* expected, we'll be seeing an awful lot more
output concerning the nature of the failure.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 20211111143719.2162525-5-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
---
 scripts/device-crash-test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index d91e8616ef..49bcd61b4f 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -499,6 +499,12 @@ def main():
         lvl = logging.WARN
     logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
 
+    if not args.debug:
+        # Async QMP, when in use, is chatty about connection failures.
+        # This script knowingly generates a ton of connection errors.
+        # Silence this logger.
+        logging.getLogger('qemu.aqmp.qmp_client').setLevel(logging.CRITICAL)
+
     fatal_failures = []
     wl_stats = {}
     skipped = 0

From c398a241ec4138e0b995a0215dea84ca93b0384f Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Thu, 11 Nov 2021 09:37:19 -0500
Subject: [PATCH 54/76] scripts/device-crash-test: hide tracebacks for QMP
 connect errors

Generally, the traceback for a connection failure is uninteresting and
all we need to know is that the connection attempt failed.

Reduce the verbosity in these cases, except when debugging.

Signed-off-by: John Snow <jsnow@redhat.com>
Reported-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Message-id: 20211111143719.2162525-6-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
---
 scripts/device-crash-test | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 49bcd61b4f..3db0ffe5b8 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -36,6 +36,7 @@ from itertools import chain
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
 from qemu.machine import QEMUMachine
+from qemu.aqmp import ConnectError
 
 logger = logging.getLogger('device-crash-test')
 dbg = logger.debug
@@ -355,10 +356,12 @@ def checkOneCase(args, testcase):
     dbg("will launch QEMU: %s", cmdline)
     vm = QEMUMachine(binary=binary, args=args)
 
+    exc = None
     exc_traceback = None
     try:
         vm.launch()
-    except Exception:
+    except Exception as this_exc:
+        exc = this_exc
         exc_traceback = traceback.format_exc()
         dbg("Exception while running test case")
     finally:
@@ -366,8 +369,9 @@ def checkOneCase(args, testcase):
         ec = vm.exitcode()
         log = vm.get_log()
 
-    if exc_traceback is not None or ec != 0:
-        return {'exc_traceback':exc_traceback,
+    if exc is not None or ec != 0:
+        return {'exc': exc,
+                'exc_traceback':exc_traceback,
                 'exitcode':ec,
                 'log':log,
                 'testcase':testcase,
@@ -455,6 +459,17 @@ def logFailure(f, level):
     for l in f['log'].strip().split('\n'):
         logger.log(level, "log: %s", l)
     logger.log(level, "exit code: %r", f['exitcode'])
+
+    # If the Exception is merely a QMP connect error,
+    # reduce the logging level for its traceback to
+    # improve visual clarity.
+    if isinstance(f.get('exc'), ConnectError):
+        logger.log(level, "%s.%s: %s",
+                   type(f['exc']).__module__,
+                   type(f['exc']).__qualname__,
+                   str(f['exc']))
+        level = logging.DEBUG
+
     if f['exc_traceback']:
         logger.log(level, "exception:")
         for l in f['exc_traceback'].split('\n'):

From 67f9968ce3f0847ffddb6ee2837a3641acd92abf Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Tue, 16 Nov 2021 21:07:31 +0100
Subject: [PATCH 55/76] Update version for v6.2.0-rc1 release

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 8de48381d2..61d04b724a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-6.1.90
+6.2.91

From fe644e8ebb8534ccc2f77da27cdc7911132f0bd1 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 15 Oct 2021 14:42:19 +0200
Subject: [PATCH 56/76] target/s390x/cpu.h: Remove unused SIGP_MODE defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These are unused since commit 075e52b816648f21 ("s390x/cpumodel:
we are always in zarchitecture mode") and it's unlikely that we
will ever need them again. So let's simply remove them now.

Message-Id: <20211015124219.1330830-1-thuth@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 target/s390x/cpu.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 3153d053e9..ca3845d023 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -674,11 +674,6 @@ QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 #define SIGP_STAT_INVALID_ORDER     0x00000002UL
 #define SIGP_STAT_RECEIVER_CHECK    0x00000001UL
 
-/* SIGP SET ARCHITECTURE modes */
-#define SIGP_MODE_ESA_S390 0
-#define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1
-#define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2
-
 /* SIGP order code mask corresponding to bit positions 56-63 */
 #define SIGP_ORDER_MASK 0x000000ff
 

From 0c8c45140c8494a3b6fd36946e437dacac2573b8 Mon Sep 17 00:00:00 2001
From: Kashyap Chamarthy <kchamart@redhat.com>
Date: Wed, 10 Nov 2021 15:49:00 +0100
Subject: [PATCH 57/76] docs: rSTify the "TrivialPatches" wiki
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original wiki is here[1].  I converted by copying the wiki source
into a .wiki file and convert to rST using `pandoc`:

        $ pandoc -f Mediawiki -t rst trivial-patches.wiki -o trivial-patches.rst

Update the active maintainer names (and drop Michael Tokarev's inactive
repo) to reflect current reality.

[1] https://wiki.qemu.org/Contribute/TrivialPatches

Signed-off-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211110144902.388183-2-kchamart@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 docs/devel/index.rst           |  1 +
 docs/devel/trivial-patches.rst | 50 ++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 docs/devel/trivial-patches.rst

diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 7c25177c5d..dbde1c44e9 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -45,3 +45,4 @@ modifying QEMU's source code.
    vfio-migration
    qapi-code-gen
    writing-monitor-commands
+   trivial-patches
diff --git a/docs/devel/trivial-patches.rst b/docs/devel/trivial-patches.rst
new file mode 100644
index 0000000000..db3f2001da
--- /dev/null
+++ b/docs/devel/trivial-patches.rst
@@ -0,0 +1,50 @@
+Trivial Patches
+===============
+
+Overview
+--------
+
+Trivial patches that change just a few lines of code sometimes languish
+on the mailing list even though they require only a small amount of
+review. This is often the case for patches that do not fall under an
+actively maintained subsystem and therefore fall through the cracks.
+
+The trivial patches team take on the task of reviewing and building pull
+requests for patches that:
+
+- Do not fall under an actively maintained subsystem.
+- Are single patches or short series (max 2-4 patches).
+- Only touch a few lines of code.
+
+**You should hint that your patch is a candidate by CCing
+qemu-trivial@nongnu.org.**
+
+Repositories
+------------
+
+Since the trivial patch team rotates maintainership there is only one
+active repository at a time:
+
+- git://github.com/vivier/qemu.git trivial-patches - `browse <https://github.com/vivier/qemu/tree/trivial-patches>`__
+
+Workflow
+--------
+
+The trivial patches team rotates the duty of collecting trivial patches
+amongst its members. A team member's job is to:
+
+1. Identify trivial patches on the development mailing list.
+2. Review trivial patches, merge them into a git tree, and reply to state
+   that the patch is queued.
+3. Send pull requests to the development mailing list once a week.
+
+A single team member can be on duty as long as they like. The suggested
+time is 1 week before handing off to the next member.
+
+Team
+----
+
+If you would like to join the trivial patches team, contact Laurent
+Vivier. The current team includes:
+
+- `Laurent Vivier <mailto:laurent@vivier.eu>`__

From 0ff0dcf6b5292e044985c38cbd83a57485ca887c Mon Sep 17 00:00:00 2001
From: Kashyap Chamarthy <kchamart@redhat.com>
Date: Wed, 10 Nov 2021 15:49:01 +0100
Subject: [PATCH 58/76] docs: rSTify the "SubmitAPullRequest" wiki
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original wiki is here[1].  I converted by copying the wiki source
into a .wiki file and convert to rST using `pandoc`:

    $ pandoc -f Mediawiki -t rst submitting-a-pull-request.wiki \
        -o submitting-a-pull-request.rst

This is a 1-1 conversion; no content changes.

[1] https://wiki.qemu.org/Contribute/SubmitAPullRequest

Signed-off-by: Kashyap Chamarthy <kchamart@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211110144902.388183-3-kchamart@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 docs/devel/index.rst                     |  1 +
 docs/devel/submitting-a-pull-request.rst | 76 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 docs/devel/submitting-a-pull-request.rst

diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index dbde1c44e9..211f8876da 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -46,3 +46,4 @@ modifying QEMU's source code.
    qapi-code-gen
    writing-monitor-commands
    trivial-patches
+   submitting-a-pull-request
diff --git a/docs/devel/submitting-a-pull-request.rst b/docs/devel/submitting-a-pull-request.rst
new file mode 100644
index 0000000000..8729d29036
--- /dev/null
+++ b/docs/devel/submitting-a-pull-request.rst
@@ -0,0 +1,76 @@
+Submit a Pull Request
+=====================
+
+QEMU welcomes contributions of code, but we generally expect these to be
+sent as simple patch emails to the mailing list (see our page on
+`submitting a patch
+<https://qemu-project.gitlab.io/qemu/devel/submitting-a-patch.html>`__
+for more details).  Generally only existing submaintainers of a tree
+will need to submit pull requests, although occasionally for a large
+patch series we might ask a submitter to send a pull request. This page
+documents our recommendations on pull requests for those people.
+
+A good rule of thumb is not to send a pull request unless somebody asks
+you to.
+
+**Resend the patches with the pull request** as emails which are
+threaded as follow-ups to the pull request itself. The simplest way to
+do this is to use ``git format-patch --cover-letter`` to create the
+emails, and then edit the cover letter to include the pull request
+details that ``git request-pull`` outputs.
+
+**Use PULL as the subject line tag** in both the cover letter and the
+retransmitted patch mails (for example, by using
+``--subject-prefix=PULL`` in your ``git format-patch`` command). This
+helps people to filter in or out the resulting emails (especially useful
+if they are only CC'd on one email out of the set).
+
+**Each patch must have your own Signed-off-by: line** as well as that of
+the original author if the patch was not written by you. This is because
+with a pull request you're now indicating that the patch has passed via
+you rather than directly from the original author.
+
+**Don't forget to add Reviewed-by: and Acked-by: lines**. When other
+people have reviewed the patches you're putting in the pull request,
+make sure you've copied their signoffs across. (If you use the `patches
+tool <https://github.com/stefanha/patches>`__ to add patches from email
+directly to your git repo it will include the tags automatically; if
+you're updating patches manually or in some other way you'll need to
+edit the commit messages by hand.)
+
+**Don't send pull requests for code that hasn't passed review**. A pull
+request says these patches are ready to go into QEMU now, so they must
+have passed the standard code review processes. In particular if you've
+corrected issues in one round of code review, you need to send your
+fixed patch series as normal to the list; you can't put it in a pull
+request until it's gone through. (Extremely trivial fixes may be OK to
+just fix in passing, but if in doubt err on the side of not.)
+
+**Test before sending**. This is an obvious thing to say, but make sure
+everything builds (including that it compiles at each step of the patch
+series) and that "make check" passes before sending out the pull
+request. As a submaintainer you're one of QEMU's lines of defense
+against bad code, so double check the details.
+
+**All pull requests must be signed**. If your key is not already signed
+by members of the QEMU community, you should make arrangements to attend
+a `KeySigningParty <https://wiki.qemu.org/KeySigningParty>`__ (for
+example at KVM Forum) or make alternative arrangements to have your key
+signed by an attendee.  Key signing requires meeting another community
+member \*in person\* so please make appropriate arrangements.  By
+"signed" here we mean that the pullreq email should quote a tag which is
+a GPG-signed tag (as created with 'gpg tag -s ...').
+
+**Pull requests not for master should say "not for master" and have
+"PULL SUBSYSTEM whatever" in the subject tag**. If your pull request is
+targeting a stable branch or some submaintainer tree, please include the
+string "not for master" in the cover letter email, and make sure the
+subject tag is "PULL SUBSYSTEM s390/block/whatever" rather than just
+"PULL". This allows it to be automatically filtered out of the set of
+pull requests that should be applied to master.
+
+You might be interested in the `make-pullreq
+<https://git.linaro.org/people/peter.maydell/misc-scripts.git/tree/make-pullreq>`__
+script which automates some of this process for you and includes a few
+sanity checks. Note that you must edit it to configure it suitably for
+your local situation!

From 9f73de8df0335c9387f4ee39e207a65a1615676f Mon Sep 17 00:00:00 2001
From: Kashyap Chamarthy <kchamart@redhat.com>
Date: Wed, 10 Nov 2021 15:49:02 +0100
Subject: [PATCH 59/76] docs: rSTify the "SubmitAPatch" wiki
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- The original wiki is here[1]. I copied the wiki source[2] into a .wiki
  file, and used `pandoc` to convert it to rST:

    $> pandoc -f Mediawiki -t rst submitting-a-patch.wiki -o
       submitting-a-patch.rst

- The only minor touch-ups I did was to fix URLs.  But 99%, it is a 1-1
  conversion.

  (An example of a "touch-up": under the section "Patch emails must
  include a Signed-off-by: line", I updated the "see SubmittingPatches
  1.12"  to "1.12) Sign your work")

- I have also converted a couple other related wiki pages (included in
  this patch series) that were hyperlinked within the SubmitAPatch page,
  or a page that it refers to:

  - SubmitAPullRequest: https://wiki.qemu.org/Contribute/SubmitAPullRequest
  - TrivialPatches: https://wiki.qemu.org/Contribute/TrivialPatches

- Over time, many people contributed to this wiki page; you can find all
  the authors in the wiki history[3].

[1] https://wiki.qemu.org/Contribute/SubmitAPatch
[2] http://wiki.qemu.org/index.php?title=Contribute/SubmitAPatch&action=edit
[3] http://wiki.qemu.org/index.php?title=Contribute/SubmitAPatch&action=history

Signed-off-by: Kashyap Chamarthy <kchamart@redhat.com>
Message-Id: <20211110144902.388183-4-kchamart@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[thuth: Cosmetic fixes]
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 docs/devel/index.rst              |   1 +
 docs/devel/submitting-a-patch.rst | 456 ++++++++++++++++++++++++++++++
 2 files changed, 457 insertions(+)
 create mode 100644 docs/devel/submitting-a-patch.rst

diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 211f8876da..afd937535e 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -46,4 +46,5 @@ modifying QEMU's source code.
    qapi-code-gen
    writing-monitor-commands
    trivial-patches
+   submitting-a-patch
    submitting-a-pull-request
diff --git a/docs/devel/submitting-a-patch.rst b/docs/devel/submitting-a-patch.rst
new file mode 100644
index 0000000000..6fefc67d52
--- /dev/null
+++ b/docs/devel/submitting-a-patch.rst
@@ -0,0 +1,456 @@
+Submitting a Patch
+==================
+
+QEMU welcomes contributions of code (either fixing bugs or adding new
+functionality). However, we get a lot of patches, and so we have some
+guidelines about submitting patches. If you follow these, you'll help
+make our task of code review easier and your patch is likely to be
+committed faster.
+
+This page seems very long, so if you are only trying to post a quick
+one-shot fix, the bare minimum we ask is that:
+
+-  You **must** provide a Signed-off-by: line (this is a hard
+   requirement because it's how you say "I'm legally okay to contribute
+   this and happy for it to go into QEMU", modeled after the `Linux kernel
+   <http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297>`__
+   policy.) ``git commit -s`` or ``git format-patch -s`` will add one.
+-  All contributions to QEMU must be **sent as patches** to the
+   qemu-devel `mailing list <MailingLists>`__. Patch contributions
+   should not be posted on the bug tracker, posted on forums, or
+   externally hosted and linked to. (We have other mailing lists too,
+   but all patches must go to qemu-devel, possibly with a Cc: to another
+   list.) ``git send-email`` works best for delivering the patch without
+   mangling it (`hints for setting it
+   up <http://lxr.free-electrons.com/source/Documentation/process/email-clients.rst>`__),
+   but attachments can be used as a last resort on a first-time
+   submission.
+-  You must read replies to your message, and be willing to act on them.
+   Note, however, that maintainers are often willing to manually fix up
+   first-time contributions, since there is a learning curve involved in
+   making an ideal patch submission.
+
+You do not have to subscribe to post (list policy is to reply-to-all to
+preserve CCs and keep non-subscribers in the loop on the threads they
+start), although you may find it easier as a subscriber to pick up good
+ideas from other posts. If you do subscribe, be prepared for a high
+volume of email, often over one thousand messages in a week. The list is
+moderated; first-time posts from an email address (whether or not you
+subscribed) may be subject to some delay while waiting for a moderator
+to whitelist your address.
+
+The larger your contribution is, or if you plan on becoming a long-term
+contributor, then the more important the rest of this page becomes.
+Reading the table of contents below should already give you an idea of
+the basic requirements. Use the table of contents as a reference, and
+read the parts that you have doubts about.
+
+.. _writing_your_patches:
+
+Writing your Patches
+--------------------
+
+.. _use_the_qemu_coding_style:
+
+Use the QEMU coding style
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can run run *scripts/checkpatch.pl <patchfile>* before submitting to
+check that you are in compliance with our coding standards. Be aware
+that ``checkpatch.pl`` is not infallible, though, especially where C
+preprocessor macros are involved; use some common sense too. See also:
+
+- `QEMU Coding Style
+  <https://qemu-project.gitlab.io/qemu/devel/style.html>`__
+
+-  `Automate a checkpatch run on
+   commit <http://blog.vmsplice.net/2011/03/how-to-automatically-run-checkpatchpl.html>`__
+
+.. _base_patches_against_current_git_master:
+
+Base patches against current git master
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There's no point submitting a patch which is based on a released version
+of QEMU because development will have moved on from then and it probably
+won't even apply to master. We only apply selected bugfixes to release
+branches and then only as backports once the code has gone into master.
+
+.. _split_up_long_patches:
+
+Split up long patches
+~~~~~~~~~~~~~~~~~~~~~
+
+Split up longer patches into a patch series of logical code changes.
+Each change should compile and execute successfully. For instance, don't
+add a file to the makefile in patch one and then add the file itself in
+patch two. (This rule is here so that people can later use tools like
+`git bisect <http://git-scm.com/docs/git-bisect>`__ without hitting
+points in the commit history where QEMU doesn't work for reasons
+unrelated to the bug they're chasing.) Put documentation first, not
+last, so that someone reading the series can do a clean-room evaluation
+of the documentation, then validate that the code matched the
+documentation. A commit message that mentions "Also, ..." is often a
+good candidate for splitting into multiple patches. For more thoughts on
+properly splitting patches and writing good commit messages, see `this
+advice from
+OpenStack <https://wiki.openstack.org/wiki/GitCommitMessages>`__.
+
+.. _make_code_motion_patches_easy_to_review:
+
+Make code motion patches easy to review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a series requires large blocks of code motion, there are tricks for
+making the refactoring easier to review. Split up the series so that
+semantic changes (or even function renames) are done in a separate patch
+from the raw code motion. Use a one-time setup of
+``git config diff.renames true; git config diff.algorithm patience``
+(Refer to `git-config <http://git-scm.com/docs/git-config>`__.)  The
+``diff.renames`` property ensures file rename patches will be given in a
+more compact representation that focuses only on the differences across
+the file rename, instead of showing the entire old file as a deletion
+and the new file as an insertion. Meanwhile, the 'diff.algorithm'
+property ensures that extracting a non-contiguous subset of one file
+into a new file, but where all extracted parts occur in the same order
+both before and after the patch, will reduce churn in trying to treat
+unrelated ``}`` lines in the original file as separating hunks of
+changes.
+
+Ideally, a code motion patch can be reviewed by doing::
+
+    git format-patch --stdout -1 > patch;
+    diff -u <(sed -n 's/^-//p' patch) <(sed -n 's/^\+//p' patch)
+
+to focus on the few changes that weren't wholesale code motion.
+
+.. _dont_include_irrelevant_changes:
+
+Don't include irrelevant changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In particular, don't include formatting, coding style or whitespace
+changes to bits of code that would otherwise not be touched by the
+patch. (It's OK to fix coding style issues in the immediate area (few
+lines) of the lines you're changing.) If you think a section of code
+really does need a reindent or other large-scale style fix, submit this
+as a separate patch which makes no semantic changes; don't put it in the
+same patch as your bug fix.
+
+For smaller patches in less frequently changed areas of QEMU, consider
+using the `trivial patches process
+<https://qemu-project.gitlab.io/qemu/devel/style.html>`__.
+
+.. _write_a_meaningful_commit_message:
+
+Write a meaningful commit message
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Commit messages should be meaningful and should stand on their own as a
+historical record of why the changes you applied were necessary or
+useful.
+
+QEMU follows the usual standard for git commit messages: the first line
+(which becomes the email subject line) is "subsystem: single line
+summary of change". Whether the "single line summary of change" starts
+with a capital is a matter of taste, but we prefer that the summary does
+not end in ".". Look at ``git shortlog -30`` for an idea of sample
+subject lines. Then there is a blank line and a more detailed
+description of the patch, another blank and your Signed-off-by: line.
+Please do not use lines that are longer than 76 characters in your
+commit message (so that the text still shows up nicely with "git show"
+in a 80-columns terminal window).
+
+The body of the commit message is a good place to document why your
+change is important. Don't include comments like "This is a suggestion
+for fixing this bug" (they can go below the ``---`` line in the email so
+they don't go into the final commit message). Make sure the body of the
+commit message can be read in isolation even if the reader's mailer
+displays the subject line some distance apart (that is, a body that
+starts with "... so that" as a continuation of the subject line is
+harder to follow).
+
+.. _submitting_your_patches:
+
+Submitting your Patches
+-----------------------
+
+.. _cc_the_relevant_maintainer:
+
+CC the relevant maintainer
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send patches both to the mailing list and CC the maintainer(s) of the
+files you are modifying. look in the MAINTAINERS file to find out who
+that is. Also try using scripts/get_maintainer.pl from the repository
+for learning the most common committers for the files you touched.
+
+Example::
+
+    ~/src/qemu/scripts/get_maintainer.pl -f hw/ide/core.c
+
+In fact, you can automate this, via a one-time setup of ``git config
+sendemail.cccmd 'scripts/get_maintainer.pl --nogit-fallback'`` (Refer to
+`git-config <http://git-scm.com/docs/git-config>`__.)
+
+.. _do_not_send_as_an_attachment:
+
+Do not send as an attachment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send patches inline so they are easy to reply to with review comments.
+Do not put patches in attachments.
+
+.. _use_git_format_patch:
+
+Use ``git format-patch``
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the right diff format.
+`git format-patch <http://git-scm.com/docs/git-format-patch>`__ will
+produce patch emails in the right format (check the documentation to
+find out how to drive it). You can then edit the cover letter before
+using ``git send-email`` to mail the files to the mailing list. (We
+recommend `git send-email <http://git-scm.com/docs/git-send-email>`__
+because mail clients often mangle patches by wrapping long lines or
+messing up whitespace. Some distributions do not include send-email in a
+default install of git; you may need to download additional packages,
+such as 'git-email' on Fedora-based systems.) Patch series need a cover
+letter, with shallow threading (all patches in the series are
+in-reply-to the cover letter, but not to each other); single unrelated
+patches do not need a cover letter (but if you do send a cover letter,
+use --numbered so the cover and the patch have distinct subject lines).
+Patches are easier to find if they start a new top-level thread, rather
+than being buried in-reply-to another existing thread.
+
+.. _patch_emails_must_include_a_signed_off_by_line:
+
+Patch emails must include a ``Signed-off-by:`` line
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For more information see `1.12) Sign your work
+<http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n296>`__.
+This is vital or we will not be able to apply your patch! Please use
+your real name to sign a patch (not an alias or acronym).
+
+If you wrote the patch, make sure your "From:" and "Signed-off-by:"
+lines use the same spelling. It's okay if you subscribe or contribute to
+the list via more than one address, but using multiple addresses in one
+commit just confuses things. If someone else wrote the patch, git will
+include a "From:" line in the body of the email (different from your
+envelope From:) that will give credit to the correct author; but again,
+that author's Signed-off-by: line is mandatory, with the same spelling.
+
+.. _include_a_meaningful_cover_letter:
+
+Include a meaningful cover letter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This usually applies only to a series that includes multiple patches;
+the cover letter explains the overall goal of such a series.
+
+When reviewers don't know your goal at the start of their review, they
+may object to early changes that don't make sense until the end of the
+series, because they do not have enough context yet at that point of
+their review. A series where the goal is unclear also risks a higher
+number of review-fix cycles because the reviewers haven't bought into
+the idea yet. If the cover letter can explain these points to the
+reviewer, the process will be smoother patches will get merged faster.
+Make sure your cover letter includes a diffstat of changes made over the
+entire series; potential reviewers know what files they are interested
+in, and they need an easy way determine if your series touches them.
+
+.. _use_the_rfc_tag_if_needed:
+
+Use the RFC tag if needed
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For example, "[PATCH RFC v2]". ``git format-patch --subject-prefix=RFC``
+can help.
+
+"RFC" means "Request For Comments" and is a statement that you don't
+intend for your patchset to be applied to master, but would like some
+review on it anyway. Reasons for doing this include:
+
+-  the patch depends on some pending kernel changes which haven't yet
+   been accepted, so the QEMU patch series is blocked until that
+   dependency has been dealt with, but is worth reviewing anyway
+-  the patch set is not finished yet (perhaps it doesn't cover all use
+   cases or work with all targets) but you want early review of a major
+   API change or design structure before continuing
+
+In general, since it's asking other people to do review work on a
+patchset that the submitter themselves is saying shouldn't be applied,
+it's best to:
+
+-  use it sparingly
+-  in the cover letter, be clear about why a patch is an RFC, what areas
+   of the patchset you're looking for review on, and why reviewers
+   should care
+
+.. _participating_in_code_review:
+
+Participating in Code Review
+----------------------------
+
+All patches submitted to the QEMU project go through a code review
+process before they are accepted. Some areas of code that are well
+maintained may review patches quickly, lesser-loved areas of code may
+have a longer delay.
+
+.. _stay_around_to_fix_problems_raised_in_code_review:
+
+Stay around to fix problems raised in code review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not many patches get into QEMU straight away -- it is quite common that
+developers will identify bugs, or suggest a cleaner approach, or even
+just point out code style issues or commit message typos. You'll need to
+respond to these, and then send a second version of your patches with
+the issues fixed. This takes a little time and effort on your part, but
+if you don't do it then your changes will never get into QEMU. It's also
+just polite -- it is quite disheartening for a developer to spend time
+reviewing your code and suggesting improvements, only to find that
+you're not going to do anything further and it was all wasted effort.
+
+When replying to comments on your patches **reply to all and not just
+the sender** -- keeping discussion on the mailing list means everybody
+can follow it.
+
+.. _pay_attention_to_review_comments:
+
+Pay attention to review comments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Someone took their time to review your work, and it pays to respect that
+effort; repeatedly submitting a series without addressing all comments
+from the previous round tends to alienate reviewers and stall your
+patch. Reviewers aren't always perfect, so it is okay if you want to
+argue that your code was correct in the first place instead of blindly
+doing everything the reviewer asked. On the other hand, if someone
+pointed out a potential issue during review, then even if your code
+turns out to be correct, it's probably a sign that you should improve
+your commit message and/or comments in the code explaining why the code
+is correct.
+
+If you fix issues that are raised during review **resend the entire
+patch series** not just the one patch that was changed. This allows
+maintainers to easily apply the fixed series without having to manually
+identify which patches are relevant. Send the new version as a complete
+fresh email or series of emails -- don't try to make it a followup to
+version 1. (This helps automatic patch email handling tools distinguish
+between v1 and v2 emails.)
+
+.. _when_resending_patches_add_a_version_tag:
+
+When resending patches add a version tag
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All patches beyond the first version should include a version tag -- for
+example, "[PATCH v2]". This means people can easily identify whether
+they're looking at the most recent version. (The first version of a
+patch need not say "v1", just [PATCH] is sufficient.) For patch series,
+the version applies to the whole series -- even if you only change one
+patch, you resend the entire series and mark it as "v2". Don't try to
+track versions of different patches in the series separately.  `git
+format-patch <http://git-scm.com/docs/git-format-patch>`__ and `git
+send-email <http://git-scm.com/docs/git-send-email>`__ both understand
+the ``-v2`` option to make this easier. Send each new revision as a new
+top-level thread, rather than burying it in-reply-to an earlier
+revision, as many reviewers are not looking inside deep threads for new
+patches.
+
+.. _include_version_history_in_patchset_revisions:
+
+Include version history in patchset revisions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For later versions of patches, include a summary of changes from
+previous versions, but not in the commit message itself. In an email
+formatted as a git patch, the commit message is the part above the "---"
+line, and this will go into the git changelog when the patch is
+committed. This part should be a self-contained description of what this
+version of the patch does, written to make sense to anybody who comes
+back to look at this commit in git in six months' time. The part below
+the "---" line and above the patch proper (git format-patch puts the
+diffstat here) is a good place to put remarks for people reading the
+patch email, and this is where the "changes since previous version"
+summary belongs. The
+`git-publish <https://github.com/stefanha/git-publish>`__ script can
+help with tracking a good summary across versions. Also, the
+`git-backport-diff <https://github.com/codyprime/git-scripts>`__ script
+can help focus reviewers on what changed between revisions.
+
+.. _tips_and_tricks:
+
+Tips and Tricks
+---------------
+
+.. _proper_use_of_reviewed_by_tags_can_aid_review:
+
+Proper use of Reviewed-by: tags can aid review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When reviewing a large series, a reviewer can reply to some of the
+patches with a Reviewed-by tag, stating that they are happy with that
+patch in isolation (sometimes conditional on minor cleanup, like fixing
+whitespace, that doesn't affect code content). You should then update
+those commit messages by hand to include the Reviewed-by tag, so that in
+the next revision, reviewers can spot which patches were already clean
+from the previous round. Conversely, if you significantly modify a patch
+that was previously reviewed, remove the reviewed-by tag out of the
+commit message, as well as listing the changes from the previous
+version, to make it easier to focus a reviewer's attention to your
+changes.
+
+.. _if_your_patch_seems_to_have_been_ignored:
+
+If your patch seems to have been ignored
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If your patchset has received no replies you should "ping" it after a
+week or two, by sending an email as a reply-to-all to the patch mail,
+including the word "ping" and ideally also a link to the page for the
+patch on
+`patchwork <http://patchwork.ozlabs.org/project/qemu-devel/list/>`__ or
+GMANE. It's worth double-checking for reasons why your patch might have
+been ignored (forgot to CC the maintainer? annoyed people by failing to
+respond to review comments on an earlier version?), but often for
+less-maintained areas of QEMU patches do just slip through the cracks.
+If your ping is also ignored, ping again after another week or so. As
+the submitter, you are the person with the most motivation to get your
+patch applied, so you have to be persistent.
+
+.. _is_my_patch_in:
+
+Is my patch in?
+~~~~~~~~~~~~~~~
+
+Once your patch has had enough review on list, the maintainer for that
+area of code will send notification to the list that they are including
+your patch in a particular staging branch. Periodically, the maintainer
+then sends a `pull request
+<https://qemu-project.gitlab.io/qemu/devel/submitting-a-pull-request.html>`__
+for aggregating topic branches into mainline qemu. Generally, you do not
+need to send a pull request unless you have contributed enough patches
+to become a maintainer over a particular section of code. Maintainers
+may further modify your commit, by resolving simple merge conflicts or
+fixing minor typos pointed out during review, but will always add a
+Signed-off-by line in addition to yours, indicating that it went through
+their tree. Occasionally, the maintainer's pull request may hit more
+difficult merge conflicts, where you may be requested to help rebase and
+resolve the problems. It may take a couple of weeks between when your
+patch first had a positive review to when it finally lands in qemu.git;
+release cycle freezes may extend that time even longer.
+
+.. _return_the_favor:
+
+Return the favor
+~~~~~~~~~~~~~~~~
+
+Peer review only works if everyone chips in a bit of review time. If
+everyone submitted more patches than they reviewed, we would have a
+patch backlog. A good goal is to try to review at least as many patches
+from others as what you submit. Don't worry if you don't know the code
+base as well as a maintainer; it's perfectly fine to admit when your
+review is weak because you are unfamiliar with the code.

From edcc4e4090ac56ea0d85ec482dd77bd7cc009b70 Mon Sep 17 00:00:00 2001
From: Bin Meng <bin.meng@windriver.com>
Date: Sat, 30 Oct 2021 11:06:06 +0800
Subject: [PATCH 60/76] target/riscv: machine: Sort the .subsections

Move the codes around so that the order of .subsections matches
the one they are referenced in vmstate_riscv_cpu.

Signed-off-by: Bin Meng <bin.meng@windriver.com>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Message-id: 20211030030606.32297-1-bmeng.cn@gmail.com
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
---
 target/riscv/machine.c | 100 ++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 7b4c739564..ad8248ebfd 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -76,56 +76,6 @@ static bool hyper_needed(void *opaque)
     return riscv_has_ext(env, RVH);
 }
 
-static bool vector_needed(void *opaque)
-{
-    RISCVCPU *cpu = opaque;
-    CPURISCVState *env = &cpu->env;
-
-    return riscv_has_ext(env, RVV);
-}
-
-static bool pointermasking_needed(void *opaque)
-{
-    RISCVCPU *cpu = opaque;
-    CPURISCVState *env = &cpu->env;
-
-    return riscv_has_ext(env, RVJ);
-}
-
-static const VMStateDescription vmstate_vector = {
-    .name = "cpu/vector",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = vector_needed,
-    .fields = (VMStateField[]) {
-            VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
-            VMSTATE_UINTTL(env.vxrm, RISCVCPU),
-            VMSTATE_UINTTL(env.vxsat, RISCVCPU),
-            VMSTATE_UINTTL(env.vl, RISCVCPU),
-            VMSTATE_UINTTL(env.vstart, RISCVCPU),
-            VMSTATE_UINTTL(env.vtype, RISCVCPU),
-            VMSTATE_END_OF_LIST()
-        }
-};
-
-static const VMStateDescription vmstate_pointermasking = {
-    .name = "cpu/pointer_masking",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = pointermasking_needed,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINTTL(env.mmte, RISCVCPU),
-        VMSTATE_UINTTL(env.mpmmask, RISCVCPU),
-        VMSTATE_UINTTL(env.mpmbase, RISCVCPU),
-        VMSTATE_UINTTL(env.spmmask, RISCVCPU),
-        VMSTATE_UINTTL(env.spmbase, RISCVCPU),
-        VMSTATE_UINTTL(env.upmmask, RISCVCPU),
-        VMSTATE_UINTTL(env.upmbase, RISCVCPU),
-
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static const VMStateDescription vmstate_hyper = {
     .name = "cpu/hyper",
     .version_id = 1,
@@ -164,6 +114,56 @@ static const VMStateDescription vmstate_hyper = {
     }
 };
 
+static bool vector_needed(void *opaque)
+{
+    RISCVCPU *cpu = opaque;
+    CPURISCVState *env = &cpu->env;
+
+    return riscv_has_ext(env, RVV);
+}
+
+static const VMStateDescription vmstate_vector = {
+    .name = "cpu/vector",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vector_needed,
+    .fields = (VMStateField[]) {
+            VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
+            VMSTATE_UINTTL(env.vxrm, RISCVCPU),
+            VMSTATE_UINTTL(env.vxsat, RISCVCPU),
+            VMSTATE_UINTTL(env.vl, RISCVCPU),
+            VMSTATE_UINTTL(env.vstart, RISCVCPU),
+            VMSTATE_UINTTL(env.vtype, RISCVCPU),
+            VMSTATE_END_OF_LIST()
+        }
+};
+
+static bool pointermasking_needed(void *opaque)
+{
+    RISCVCPU *cpu = opaque;
+    CPURISCVState *env = &cpu->env;
+
+    return riscv_has_ext(env, RVJ);
+}
+
+static const VMStateDescription vmstate_pointermasking = {
+    .name = "cpu/pointer_masking",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = pointermasking_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.mmte, RISCVCPU),
+        VMSTATE_UINTTL(env.mpmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.mpmbase, RISCVCPU),
+        VMSTATE_UINTTL(env.spmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.spmbase, RISCVCPU),
+        VMSTATE_UINTTL(env.upmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.upmbase, RISCVCPU),
+
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_riscv_cpu = {
     .name = "cpu",
     .version_id = 3,

From c94c239496256f1f1cb589825d052c2f3e26ebf6 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Tue, 16 Nov 2021 10:50:42 +0100
Subject: [PATCH 61/76] meson.build: Merge riscv32 and riscv64 cpu family
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ba0e73336200, we merged riscv32 and riscv64 in configure.
However, meson does not treat them the same.  We need to merge
them here as well.

Fixes: ba0e73336200
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Message-id: 20211116095042.335224-1-richard.henderson@linaro.org
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
---
 meson.build | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/meson.build b/meson.build
index 36540e0794..e2d38a43e6 100644
--- a/meson.build
+++ b/meson.build
@@ -59,6 +59,12 @@ supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
   'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64']
 
 cpu = host_machine.cpu_family()
+
+# Unify riscv* to a single family.
+if cpu in ['riscv32', 'riscv64']
+  cpu = 'riscv'
+endif
+
 targetos = host_machine.system()
 
 if cpu in ['x86', 'x86_64']

From 418ce0201ff6f4c9d9560eaea15ff393c2cd7412 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 12 Nov 2021 08:22:20 +0100
Subject: [PATCH 62/76] Revert "device-crash-test: Ignore errors about a bus
 not being available"

This reverts commit ca89d15f8e42f2e5eac5bd200af38fdbfb32e875.

There is already an entry for this kind of messages earlier in the
ERROR_RULE_LIST - when I added this patch, I just got fooled by
the other errors that occur due to a race between QMP connection
and QEMU terminating early (which still spit out the 'No bus found'
messages in their backtrace), but these other problems have now
fortunately been tackled by John Snow, so we certainly don't need
this duplicated entry here anymore.

Message-Id: <20211112072220.108580-1-thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 scripts/device-crash-test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 3db0ffe5b8..1c73dac93e 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -176,7 +176,6 @@ ERROR_RULE_LIST = [
     {'log':r"Multiple VT220 operator consoles are not supported"},
     {'log':r"core 0 already populated"},
     {'log':r"could not find stage1 bootloader"},
-    {'log':r"No '.*' bus found for device"},
 
     # other exitcode=1 failures not listed above will just generate INFO messages:
     {'exitcode':1, 'loglevel':logging.INFO},

From d06f3bf922679d89815fde9eac9264ba44e37954 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 16 Nov 2021 17:33:09 +0100
Subject: [PATCH 63/76] gitlab-ci/cirrus: Increase timeout to 80 minutes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The jobs on Cirrus-CI sometimes get delayed quite a bit, waiting to
be scheduled, so while the build test itself finishes within 60 minutes,
the total run time of the jobs can be longer due to this waiting time.
Thus let's increase the timeout on the gitlab side a little bit, so
that these jobs are not marked as failing just because of the delay.

Message-Id: <20211116163309.246602-1-thuth@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Willian Rampazzo <willianr@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 .gitlab-ci.d/cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index cc2f2e8906..d273a9e713 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -14,6 +14,7 @@
   stage: build
   image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
   needs: []
+  timeout: 80m
   allow_failure: true
   script:
     - source .gitlab-ci.d/cirrus/$NAME.vars

From f3bc3a73c908df15966e66f88d5a633bd42fd029 Mon Sep 17 00:00:00 2001
From: Peng Liang <liangpeng10@huawei.com>
Date: Wed, 17 Nov 2021 09:47:39 +0800
Subject: [PATCH 64/76] vfio: Fix memory leak of hostwin

hostwin is allocated and added to hostwin_list in vfio_host_win_add, but
it is only deleted from hostwin_list in vfio_host_win_del, which causes
a memory leak.  Also, freeing all elements in hostwin_list is missing in
vfio_disconnect_container.

Fix: 2e4109de8e58 ("vfio/spapr: Create DMA window dynamically (SPAPR IOMMU v2)")
CC: qemu-stable@nongnu.org
Signed-off-by: Peng Liang <liangpeng10@huawei.com>
Link: https://lore.kernel.org/r/20211117014739.1839263-1-liangpeng10@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/common.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index dd387b0d39..080046e3f5 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -551,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
     QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
         if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
             QLIST_REMOVE(hostwin, hostwin_next);
+            g_free(hostwin);
             return 0;
         }
     }
@@ -2239,6 +2240,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
     if (QLIST_EMPTY(&container->group_list)) {
         VFIOAddressSpace *space = container->space;
         VFIOGuestIOMMU *giommu, *tmp;
+        VFIOHostDMAWindow *hostwin, *next;
 
         QLIST_REMOVE(container, next);
 
@@ -2249,6 +2251,12 @@ static void vfio_disconnect_container(VFIOGroup *group)
             g_free(giommu);
         }
 
+        QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
+                           next) {
+            QLIST_REMOVE(hostwin, hostwin_next);
+            g_free(hostwin);
+        }
+
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
         g_free(container);

From 55cdf566412695b4fc052065c7970632129cd65b Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:43 +0000
Subject: [PATCH 65/76] qapi/qom,target/i386: sev-guest: Introduce
 kernel-hashes=on|off option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce new boolean 'kernel-hashes' option on the sev-guest object.
It will be used to to decide whether to add the hashes of
kernel/initrd/cmdline to SEV guest memory when booting with -kernel.
The default value is 'off'.

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 qapi/qom.json     |  7 ++++++-
 qemu-options.hx   |  6 +++++-
 target/i386/sev.c | 20 ++++++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index ccd1167808..eeb5395ff3 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -769,6 +769,10 @@
 # @reduced-phys-bits: number of bits in physical addresses that become
 #                     unavailable when SEV is enabled
 #
+# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a
+#                 designated guest firmware page for measured boot
+#                 with -kernel (default: false) (since 6.2)
+#
 # Since: 2.12
 ##
 { 'struct': 'SevGuestProperties',
@@ -778,7 +782,8 @@
             '*policy': 'uint32',
             '*handle': 'uint32',
             '*cbitpos': 'uint32',
-            'reduced-phys-bits': 'uint32' } }
+            'reduced-phys-bits': 'uint32',
+            '*kernel-hashes': 'bool' } }
 
 ##
 # @ObjectType:
diff --git a/qemu-options.hx b/qemu-options.hx
index 7749f59300..ae2c6dbbfc 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -5189,7 +5189,7 @@ SRST
                  -object secret,id=sec0,keyid=secmaster0,format=base64,\\
                      data=$SECRET,iv=$(<iv.b64)
 
-    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file]``
+    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file,kernel-hashes=on|off]``
         Create a Secure Encrypted Virtualization (SEV) guest object,
         which can be used to provide the guest memory encryption support
         on AMD processors.
@@ -5229,6 +5229,10 @@ SRST
         session with the guest owner to negotiate keys used for
         attestation. The file must be encoded in base64.
 
+        The ``kernel-hashes`` adds the hashes of given kernel/initrd/
+        cmdline to a designated guest firmware page for measured Linux
+        boot with -kernel. The default is off. (Since 6.2)
+
         e.g to launch a SEV guest
 
         .. parsed-literal::
diff --git a/target/i386/sev.c b/target/i386/sev.c
index eede07f11d..cad32812f5 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -62,6 +62,7 @@ struct SevGuestState {
     char *session_file;
     uint32_t cbitpos;
     uint32_t reduced_phys_bits;
+    bool kernel_hashes;
 
     /* runtime state */
     uint32_t handle;
@@ -327,6 +328,20 @@ sev_guest_set_sev_device(Object *obj, const char *value, Error **errp)
     sev->sev_device = g_strdup(value);
 }
 
+static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp)
+{
+    SevGuestState *sev = SEV_GUEST(obj);
+
+    return sev->kernel_hashes;
+}
+
+static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
+{
+    SevGuestState *sev = SEV_GUEST(obj);
+
+    sev->kernel_hashes = value;
+}
+
 static void
 sev_guest_class_init(ObjectClass *oc, void *data)
 {
@@ -345,6 +360,11 @@ sev_guest_class_init(ObjectClass *oc, void *data)
                                   sev_guest_set_session_file);
     object_class_property_set_description(oc, "session-file",
             "guest owners session parameters (encoded with base64)");
+    object_class_property_add_bool(oc, "kernel-hashes",
+                                   sev_guest_get_kernel_hashes,
+                                   sev_guest_set_kernel_hashes);
+    object_class_property_set_description(oc, "kernel-hashes",
+            "add kernel hashes to guest firmware for measured Linux boot");
 }
 
 static void

From 9dbe0c93f00d3aef9ac386c390595d370cfad677 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:44 +0000
Subject: [PATCH 66/76] target/i386/sev: Add kernel hashes only if
 sev-guest.kernel-hashes=on
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit cff03145ed3c ("sev/i386: Introduce sev_add_kernel_loader_hashes
for measured linux boot", 2021-09-30) introduced measured direct boot
with -kernel, using an OVMF-designated hashes table which QEMU fills.

However, if OVMF doesn't designate such an area, QEMU would completely
abort the VM launch.  This breaks launching with -kernel using older
OVMF images which don't publish the SEV_HASH_TABLE_RV_GUID.

Fix that so QEMU will only look for the hashes table if the sev-guest
kernel-hashes option is set to on.  Otherwise, QEMU won't look for the
designated area in OVMF and won't fill that area.

To enable addition of kernel hashes, launch the guest with:

    -object sev-guest,...,kernel-hashes=on

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Reported-by: Tom Lendacky <thomas.lendacky@amd.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 target/i386/sev.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index cad32812f5..e3abbeef68 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1223,6 +1223,14 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     size_t hash_len = HASH_SIZE;
     int aligned_len;
 
+    /*
+     * Only add the kernel hashes if the sev-guest configuration explicitly
+     * stated kernel-hashes=on.
+     */
+    if (!sev_guest->kernel_hashes) {
+        return false;
+    }
+
     if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
         error_setg(errp, "SEV: kernel specified but OVMF has no hash table guid");
         return false;

From 5a0294a21c7677498bf40a447cc4a417d51a3cf4 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:45 +0000
Subject: [PATCH 67/76] target/i386/sev: Rephrase error message when no hashes
 table in guest firmware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 target/i386/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index e3abbeef68..6ff196f7ad 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1232,7 +1232,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     }
 
     if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
-        error_setg(errp, "SEV: kernel specified but OVMF has no hash table guid");
+        error_setg(errp, "SEV: kernel specified but guest firmware "
+                         "has no hashes table GUID");
         return false;
     }
     area = (SevHashTableDescriptor *)data;

From a0190bf15044d2410e84f9a12aeac9ed56bd58b0 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:46 +0000
Subject: [PATCH 68/76] target/i386/sev: Fail when invalid hashes table area
 detected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit cff03145ed3c ("sev/i386: Introduce sev_add_kernel_loader_hashes
for measured linux boot", 2021-09-30) introduced measured direct boot
with -kernel, using an OVMF-designated hashes table which QEMU fills.

However, no checks are performed on the validity of the hashes area
designated by OVMF.  Specifically, if OVMF publishes the
SEV_HASH_TABLE_RV_GUID entry but it is filled with zeroes, this will
cause QEMU to write the hashes entries over the first page of the
guest's memory (GPA 0).

Add validity checks to the published area.  If the hashes table area's
base address is zero, or its size is too small to fit the aligned hashes
table, display an error and stop the guest launch.  In such case, the
following error will be displayed:

    qemu-system-x86_64: SEV: guest firmware hashes table area is invalid (base=0x0 size=0x0)

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Reported-by: Brijesh Singh <brijesh.singh@amd.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 target/i386/sev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 6ff196f7ad..d11b512361 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1221,7 +1221,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     uint8_t kernel_hash[HASH_SIZE];
     uint8_t *hashp;
     size_t hash_len = HASH_SIZE;
-    int aligned_len;
+    int aligned_len = ROUND_UP(sizeof(SevHashTable), 16);
 
     /*
      * Only add the kernel hashes if the sev-guest configuration explicitly
@@ -1237,6 +1237,11 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
         return false;
     }
     area = (SevHashTableDescriptor *)data;
+    if (!area->base || area->size < aligned_len) {
+        error_setg(errp, "SEV: guest firmware hashes table area is invalid "
+                         "(base=0x%x size=0x%x)", area->base, area->size);
+        return false;
+    }
 
     /*
      * Calculate hash of kernel command-line with the terminating null byte. If
@@ -1295,7 +1300,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash));
 
     /* When calling sev_encrypt_flash, the length has to be 16 byte aligned */
-    aligned_len = ROUND_UP(ht->len, 16);
     if (aligned_len != ht->len) {
         /* zero the excess data so the measurement can be reliably calculated */
         memset(ht->padding, 0, aligned_len - ht->len);

From ddcc0d898e4040b1795254fac8ac4f8514b0ecb8 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:47 +0000
Subject: [PATCH 69/76] target/i386/sev: Perform padding calculations at
 compile-time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In sev_add_kernel_loader_hashes, the sizes of structs are known at
compile-time, so calculate needed padding at compile-time.

No functional change intended.

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 target/i386/sev.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index d11b512361..4fd258a570 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -110,9 +110,19 @@ typedef struct QEMU_PACKED SevHashTable {
     SevHashTableEntry cmdline;
     SevHashTableEntry initrd;
     SevHashTableEntry kernel;
-    uint8_t padding[];
 } SevHashTable;
 
+/*
+ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of
+ * 16 bytes.
+ */
+typedef struct QEMU_PACKED PaddedSevHashTable {
+    SevHashTable ht;
+    uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)];
+} PaddedSevHashTable;
+
+QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
+
 static SevGuestState *sev_guest;
 static Error *sev_mig_blocker;
 
@@ -1216,12 +1226,12 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     uint8_t *data;
     SevHashTableDescriptor *area;
     SevHashTable *ht;
+    PaddedSevHashTable *padded_ht;
     uint8_t cmdline_hash[HASH_SIZE];
     uint8_t initrd_hash[HASH_SIZE];
     uint8_t kernel_hash[HASH_SIZE];
     uint8_t *hashp;
     size_t hash_len = HASH_SIZE;
-    int aligned_len = ROUND_UP(sizeof(SevHashTable), 16);
 
     /*
      * Only add the kernel hashes if the sev-guest configuration explicitly
@@ -1237,7 +1247,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
         return false;
     }
     area = (SevHashTableDescriptor *)data;
-    if (!area->base || area->size < aligned_len) {
+    if (!area->base || area->size < sizeof(PaddedSevHashTable)) {
         error_setg(errp, "SEV: guest firmware hashes table area is invalid "
                          "(base=0x%x size=0x%x)", area->base, area->size);
         return false;
@@ -1282,7 +1292,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
      * Populate the hashes table in the guest's memory at the OVMF-designated
      * area for the SEV hashes table
      */
-    ht = qemu_map_ram_ptr(NULL, area->base);
+    padded_ht = qemu_map_ram_ptr(NULL, area->base);
+    ht = &padded_ht->ht;
 
     ht->guid = sev_hash_table_header_guid;
     ht->len = sizeof(*ht);
@@ -1299,13 +1310,10 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     ht->kernel.len = sizeof(ht->kernel);
     memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash));
 
-    /* When calling sev_encrypt_flash, the length has to be 16 byte aligned */
-    if (aligned_len != ht->len) {
-        /* zero the excess data so the measurement can be reliably calculated */
-        memset(ht->padding, 0, aligned_len - ht->len);
-    }
+    /* zero the excess data so the measurement can be reliably calculated */
+    memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
 
-    if (sev_encrypt_flash((uint8_t *)ht, aligned_len, errp) < 0) {
+    if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
         return false;
     }
 

From 58603ba2680fa35eade630e4b040e96953a11021 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 11 Nov 2021 10:00:48 +0000
Subject: [PATCH 70/76] target/i386/sev: Replace qemu_map_ram_ptr with
 address_space_map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use address_space_map/unmap and check for errors.

Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
[Two lines wrapped for length - Daniel]
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 target/i386/sev.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 4fd258a570..025ff7a6f8 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -37,6 +37,7 @@
 #include "qapi/qmp/qerror.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
+#include "exec/address-spaces.h"
 
 #define TYPE_SEV_GUEST "sev-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
@@ -1232,6 +1233,9 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     uint8_t kernel_hash[HASH_SIZE];
     uint8_t *hashp;
     size_t hash_len = HASH_SIZE;
+    hwaddr mapped_len = sizeof(*padded_ht);
+    MemTxAttrs attrs = { 0 };
+    bool ret = true;
 
     /*
      * Only add the kernel hashes if the sev-guest configuration explicitly
@@ -1292,7 +1296,12 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
      * Populate the hashes table in the guest's memory at the OVMF-designated
      * area for the SEV hashes table
      */
-    padded_ht = qemu_map_ram_ptr(NULL, area->base);
+    padded_ht = address_space_map(&address_space_memory, area->base,
+                                  &mapped_len, true, attrs);
+    if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
+        error_setg(errp, "SEV: cannot map hashes table guest memory area");
+        return false;
+    }
     ht = &padded_ht->ht;
 
     ht->guid = sev_hash_table_header_guid;
@@ -1314,10 +1323,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
     memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
 
     if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
-        return false;
+        ret = false;
     }
 
-    return true;
+    address_space_unmap(&address_space_memory, padded_ht,
+                        mapped_len, true, mapped_len);
+
+    return ret;
 }
 
 static void

From d05dcd94aee88728facafb993c7280547eb4d645 Mon Sep 17 00:00:00 2001
From: Prasad J Pandit <pjp@fedoraproject.org>
Date: Sat, 30 Jan 2021 18:46:52 +0530
Subject: [PATCH 71/76] net: vmxnet3: validate configuration values during
 activate (CVE-2021-20203)

While activating device in vmxnet3_acticate_device(), it does not
validate guest supplied configuration values against predefined
minimum - maximum limits. This may lead to integer overflow or
OOB access issues. Add checks to avoid it.

Fixes: CVE-2021-20203
Buglink: https://bugs.launchpad.net/qemu/+bug/1913873
Reported-by: Gaoning Pan <pgn@zju.edu.cn>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/vmxnet3.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 41f796a247..f65af4e9ef 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1441,6 +1441,7 @@ static void vmxnet3_activate_device(VMXNET3State *s)
     vmxnet3_setup_rx_filtering(s);
     /* Cache fields from shared memory */
     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
+    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU);
     VMW_CFPRN("MTU is %u", s->mtu);
 
     s->max_rx_frags =
@@ -1486,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* Read rings memory locations for TX queues */
         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
+        if (size > VMXNET3_TX_RING_MAX_SIZE) {
+            size = VMXNET3_TX_RING_MAX_SIZE;
+        }
 
         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
                           sizeof(struct Vmxnet3_TxDesc), false);
@@ -1496,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* TXC ring */
         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
+        if (size > VMXNET3_TC_RING_MAX_SIZE) {
+            size = VMXNET3_TC_RING_MAX_SIZE;
+        }
         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
                           sizeof(struct Vmxnet3_TxCompDesc), true);
         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
@@ -1537,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
             /* RX rings */
             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
+            if (size > VMXNET3_RX_RING_MAX_SIZE) {
+                size = VMXNET3_RX_RING_MAX_SIZE;
+            }
             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
                               sizeof(struct Vmxnet3_RxDesc), false);
             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
@@ -1546,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* RXC ring */
         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
+        if (size > VMXNET3_RC_RING_MAX_SIZE) {
+            size = VMXNET3_RC_RING_MAX_SIZE;
+        }
         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
                           sizeof(struct Vmxnet3_RxCompDesc), true);
         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);

From fb5eca4a571e303aafac7130abd66adc184aae72 Mon Sep 17 00:00:00 2001
From: Zhang Chen <chen.zhang@intel.com>
Date: Thu, 18 Nov 2021 11:20:10 +0800
Subject: [PATCH 72/76] net/colo-compare.c: Fix ACK track reverse issue

The TCP protocol ACK maybe bigger than uint32_t MAX.
At this time, the ACK will reverse to 0. This patch
fix the max_ack and min_ack track issue.

Signed-off-by: Zhang Chen <chen.zhang@intel.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/colo-compare.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b8876d7fd9..1225f40e41 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -209,7 +209,8 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
 
     pkt->tcp_seq = ntohl(tcphd->th_seq);
     pkt->tcp_ack = ntohl(tcphd->th_ack);
-    *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
+    /* Need to consider ACK will bigger than uint32_t MAX */
+    *max_ack = pkt->tcp_ack - *max_ack > 0 ? pkt->tcp_ack : *max_ack;
     pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
                        + (tcphd->th_off << 2);
     pkt->payload_size = pkt->size - pkt->header_size;
@@ -413,7 +414,8 @@ static void colo_compare_tcp(CompareState *s, Connection *conn)
      * can ensure that the packet's payload is acknowledged by
      * primary and secondary.
     */
-    uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
+    uint32_t min_ack = conn->pack - conn->sack > 0 ?
+                       conn->sack : conn->pack;
 
 pri:
     if (g_queue_is_empty(&conn->primary_list)) {

From 0656fbc7ddccdade1709742a9b56ae07dd3c280a Mon Sep 17 00:00:00 2001
From: Zhang Chen <chen.zhang@intel.com>
Date: Thu, 18 Nov 2021 11:20:11 +0800
Subject: [PATCH 73/76] net/colo-compare.c: Fix incorrect return when input
 wrong size

Signed-off-by: Zhang Chen <chen.zhang@intel.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/colo-compare.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 1225f40e41..b966e7e514 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -807,7 +807,7 @@ static int compare_chr_send(CompareState *s,
     }
 
     if (!size) {
-        return 0;
+        return -1;
     }
 
     entry = g_slice_new(SendEntry);

From 9fc6e86e8b69e2e672cbb9c25cddc3b9deb96afb Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 9 Sep 2021 11:43:08 +0200
Subject: [PATCH 74/76] hw/nvme: reattach subsystem namespaces on hotplug

With commit 5ffbaeed16 ("hw/nvme: fix controller hot unplugging")
namespaces get moved from the controller to the subsystem if one
is specified.
That keeps the namespaces alive after a controller hot-unplug, but
after a controller hotplug we have to reconnect the namespaces
from the subsystem to the controller.

Fixes: 5ffbaeed16 ("hw/nvme: fix controller hot unplugging")
Cc: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Hannes Reinecke <hare@suse.de>
[k.jensen: only attach to shared and non-detached namespaces]
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/nvme/subsys.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 495dcff5eb..fb58d63950 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -14,7 +14,7 @@
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 {
     NvmeSubsystem *subsys = n->subsys;
-    int cntlid;
+    int cntlid, nsid;
 
     for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
         if (!subsys->ctrls[cntlid]) {
@@ -29,12 +29,20 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 
     subsys->ctrls[cntlid] = n;
 
+    for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
+        NvmeNamespace *ns = subsys->namespaces[nsid];
+        if (ns && ns->params.shared && !ns->params.detached) {
+            nvme_attach_ns(n, ns);
+        }
+    }
+
     return cntlid;
 }
 
 void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
 {
     subsys->ctrls[n->cntlid] = NULL;
+    n->cntlid = -1;
 }
 
 static void nvme_subsys_setup(NvmeSubsystem *subsys)

From 916b0f0b5264759c581badfdc0e1c5f98362dbda Mon Sep 17 00:00:00 2001
From: Klaus Jensen <k.jensen@samsung.com>
Date: Fri, 24 Sep 2021 08:52:22 +0200
Subject: [PATCH 75/76] hw/nvme: change nvme-ns 'shared' default

Change namespaces to be shared namespaces by default (parameter
shared=on). Keep shared=off for older machine types.

Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 docs/system/devices/nvme.rst | 24 ++++++++++++++----------
 hw/core/machine.c            |  1 +
 hw/nvme/ns.c                 |  8 +-------
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index bff72d1c24..a1c0db01f6 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -110,28 +110,32 @@ multipath I/O.
 This will create an NVM subsystem with two controllers. Having controllers
 linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
 
-``shared`` (default: ``off``)
+``shared`` (default: ``on`` since 6.2)
   Specifies that the namespace will be attached to all controllers in the
-  subsystem. If set to ``off`` (the default), the namespace will remain a
-  private namespace and may only be attached to a single controller at a time.
+  subsystem. If set to ``off``, the namespace will remain a private namespace
+  and may only be attached to a single controller at a time. Shared namespaces
+  are always automatically attached to all controllers (also when controllers
+  are hotplugged).
 
 ``detached`` (default: ``off``)
   If set to ``on``, the namespace will be be available in the subsystem, but
-  not attached to any controllers initially.
+  not attached to any controllers initially. A shared namespace with this set
+  to ``on`` will never be automatically attached to controllers.
 
 Thus, adding
 
 .. code-block:: console
 
    -drive file=nvm-1.img,if=none,id=nvm-1
-   -device nvme-ns,drive=nvm-1,nsid=1,shared=on
+   -device nvme-ns,drive=nvm-1,nsid=1
    -drive file=nvm-2.img,if=none,id=nvm-2
-   -device nvme-ns,drive=nvm-2,nsid=3,detached=on
+   -device nvme-ns,drive=nvm-2,nsid=3,shared=off,detached=on
 
-will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is
-initially attached to both controllers. NSID 3 will be a private namespace
-(i.e. only attachable to a single controller at a time) and will not be
-attached to any controller initially (due to ``detached=on``).
+will cause NSID 1 will be a shared namespace that is initially attached to both
+controllers. NSID 3 will be a private namespace due to ``shared=off`` and only
+attachable to a single controller at a time. Additionally it will not be
+attached to any controller initially (due to ``detached=on``) or to hotplugged
+controllers.
 
 Optional Features
 =================
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 26ec54e726..53a99abc56 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -39,6 +39,7 @@
 
 GlobalProperty hw_compat_6_1[] = {
     { "vhost-user-vsock-device", "seqpacket", "off" },
+    { "nvme-ns", "shared", "off" },
 };
 const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1);
 
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index b7cf1494e7..8b5f98c761 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -465,12 +465,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
                        "linked to an nvme-subsys device");
             return;
         }
-
-        if (ns->params.shared) {
-            error_setg(errp, "shared requires that the nvme device is "
-                       "linked to an nvme-subsys device");
-            return;
-        }
     } else {
         /*
          * If this namespace belongs to a subsystem (through a link on the
@@ -532,7 +526,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 static Property nvme_ns_props[] = {
     DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
     DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
-    DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false),
+    DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true),
     DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
     DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
     DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),

From e2c57529c9306e4c9aac75d9879f6e7699584a22 Mon Sep 17 00:00:00 2001
From: Klaus Jensen <k.jensen@samsung.com>
Date: Wed, 17 Nov 2021 14:12:56 +0100
Subject: [PATCH 76/76] hw/nvme: fix buffer overrun in nvme_changed_nslist
 (CVE-2021-3947)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix missing offset verification.

Cc: qemu-stable@nongnu.org
Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
Reported-by: Qiuhao Li <Qiuhao.Li@outlook.com>
Fixes: f432fdfa121 ("support changed namespace asynchronous event")
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/nvme/ctrl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6a571d18cf..5f573c417b 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4168,6 +4168,11 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
     int i = 0;
     uint32_t nsid;
 
+    if (off >= sizeof(nslist)) {
+        trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
     memset(nslist, 0x0, sizeof(nslist));
     trans_len = MIN(sizeof(nslist) - off, buf_len);