From 024d046bf41b5256adec671085bcee767a6da125 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Jul 2024 06:48:59 -0400 Subject: [PATCH 01/19] virtio-rng: block max-bytes=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit with max-bytes set to 0, quota is 0 and so device does not work. block this to avoid user confusion Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/virtio/virtio-rng.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index f74efffef7..7cf31da071 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) /* Workaround: Property parsing does not enforce unsigned integers, * So this is a hack to reject such numbers. */ - if (vrng->conf.max_bytes > INT64_MAX) { - error_setg(errp, "'max-bytes' parameter must be non-negative, " + if (vrng->conf.max_bytes == 0 || + vrng->conf.max_bytes > INT64_MAX) { + error_setg(errp, "'max-bytes' parameter must be positive, " "and less than 2^63"); return; } From e57030e8dc071424b0fc35c0e0cf2898e53d5e81 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:16 -0400 Subject: [PATCH 02/19] Revert "docs: Document composable SR-IOV device" This reverts commit d6f40c95b35bd380340b698e4306704fe22a5d68. Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 - docs/system/index.rst | 1 - docs/system/sriov.rst | 36 ------------------------------------ 3 files changed, 38 deletions(-) delete mode 100644 docs/system/sriov.rst diff --git a/MAINTAINERS b/MAINTAINERS index 72b3c67360..e34c2bd4cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2011,7 +2011,6 @@ F: hw/pci-bridge/* F: qapi/pci.json F: docs/pci* F: docs/specs/*pci* -F: docs/system/sriov.rst PCIE DOE M: Huai-Cheng Kuo diff --git a/docs/system/index.rst b/docs/system/index.rst index 718e9d3c56..c21065e519 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -39,4 +39,3 @@ or Hypervisor.Framework. multi-process confidential-guest-support vm-templating - sriov diff --git a/docs/system/sriov.rst b/docs/system/sriov.rst deleted file mode 100644 index a851a66a4b..0000000000 --- a/docs/system/sriov.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0-or-later - -Compsable SR-IOV device -======================= - -SR-IOV (Single Root I/O Virtualization) is an optional extended capability of a -PCI Express device. It allows a single physical function (PF) to appear as -multiple virtual functions (VFs) for the main purpose of eliminating software -overhead in I/O from virtual machines. - -There are devices with predefined SR-IOV configurations, but it is also possible -to compose an SR-IOV device yourself. Composing an SR-IOV device is currently -only supported by virtio-net-pci. - -Users can configure an SR-IOV-capable virtio-net device by adding -virtio-net-pci functions to a bus. Below is a command line example: - -.. code-block:: shell - - -netdev user,id=n -netdev user,id=o - -netdev user,id=p -netdev user,id=q - -device pcie-root-port,id=b - -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f - -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f - -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f - -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f - -The VFs specify the paired PF with ``sriov-pf`` property. The PF must be -added after all VFs. It is the user's responsibility to ensure that VFs have -function numbers larger than one of the PF, and that the function numbers -have a consistent stride. - -You may also need to perform additional steps to activate the SR-IOV feature on -your guest. For Linux, refer to [1]_. - -.. [1] https://docs.kernel.org/PCI/pci-iov-howto.html From cc91ac0a7240dea713acc802d0c304163a1c15c6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:17 -0400 Subject: [PATCH 03/19] Revert "virtio-net: Implement SR-IOV VF" This reverts commit c2d6db6a1f39780b24538440091893f9fbe060a7. Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-net-pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index dba4987d6e..e03543a70a 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -75,7 +75,6 @@ static void virtio_net_pci_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; - k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; From 67f5b279fc72e43ccdee20a1a1e54cb51e24f06a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:18 -0400 Subject: [PATCH 04/19] Revert "virtio-pci: Implement SR-IOV PF" This reverts commit 3f868ffb0bae0c4feafabe34a371cded57fe3806. Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 20 +++++--------------- include/hw/virtio/virtio-pci.h | 1 - 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0c8fcc5627..9534730bba 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1955,7 +1955,6 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); - int16_t res; /* * Virtio capabilities present without @@ -2101,14 +2100,6 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } - - res = pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev, - proxy->last_pcie_cap_offset, - errp); - if (res > 0) { - proxy->last_pcie_cap_offset += res; - virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); - } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2196,7 +2187,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); @@ -2216,9 +2207,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; + last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2243,9 +2234,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, + pcie_ats_init(pci_dev, last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2272,7 +2263,6 @@ static void virtio_pci_exit(PCIDevice *pci_dev) bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && !pci_bus_is_root(pci_get_bus(pci_dev)); - pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 34539f2f67..9e67ba38c7 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -152,7 +152,6 @@ struct VirtIOPCIProxy { uint32_t modern_io_bar_idx; uint32_t modern_mem_bar_idx; int config_cap; - uint16_t last_pcie_cap_offset; uint32_t flags; bool disable_modern; bool ignore_backend_features; From aa01c4914ed6f6c087ee172483e22a515c4cc66a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:19 -0400 Subject: [PATCH 05/19] Revert "pcie_sriov: Allow user to create SR-IOV device" This reverts commit 122173a5830f7757f8a94a3b1559582f312e140b. Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 62 +++----- hw/pci/pcie_sriov.c | 290 ++++++++---------------------------- include/hw/pci/pci_device.h | 6 +- include/hw/pci/pcie_sriov.h | 18 --- 4 files changed, 83 insertions(+), 293 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 8ad5d7e2d8..cf2794879d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -85,7 +85,6 @@ static Property pci_props[] = { QEMU_PCIE_ERR_UNC_MASK_BITNR, true), DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), - DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf), DEFINE_PROP_END_OF_LIST() }; @@ -960,8 +959,13 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } - /* SR/IOV is not handled here. */ - if (pci_is_vf(dev)) { + /* + * With SR/IOV and ARI, a device at function 0 need not be a multifunction + * device, as it may just be a VF that ended up with function 0 in + * the legacy PCI interpretation. Avoid failing in such cases: + */ + if (pci_is_vf(dev) && + dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { return; } @@ -994,8 +998,7 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* function 0 indicates single function, so function > 0 must be NULL */ for (func = 1; func < PCI_FUNC_MAX; ++func) { - PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)]; - if (device && !pci_is_vf(device)) { + if (bus->devices[PCI_DEVFN(slot, func)]) { error_setg(errp, "PCI: %x.0 indicates single function, " "but %x.%x is already populated.", slot, slot, func); @@ -1280,7 +1283,6 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_unregister_io_regions(pci_dev); pci_del_option_rom(pci_dev); - pcie_sriov_unregister_device(pci_dev); if (pc->exit) { pc->exit(pci_dev); @@ -1312,6 +1314,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; + assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1322,6 +1325,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2); r = &pci_dev->io_regions[region_num]; + r->addr = PCI_BAR_UNMAPPED; r->size = size; r->type = type; r->memory = memory; @@ -1329,35 +1333,22 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, ? pci_get_bus(pci_dev)->address_space_io : pci_get_bus(pci_dev)->address_space_mem; - if (pci_is_vf(pci_dev)) { - PCIDevice *pf = pci_dev->exp.sriov_vf.pf; - assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]); + wmask = ~(size - 1); + if (region_num == PCI_ROM_SLOT) { + /* ROM enable bit is writable */ + wmask |= PCI_ROM_ADDRESS_ENABLE; + } - r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size); - if (r->addr != PCI_BAR_UNMAPPED) { - memory_region_add_subregion_overlap(r->address_space, - r->addr, r->memory, 1); - } + addr = pci_bar(pci_dev, region_num); + pci_set_long(pci_dev->config + addr, type); + + if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && + r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(pci_dev->wmask + addr, wmask); + pci_set_quad(pci_dev->cmask + addr, ~0ULL); } else { - r->addr = PCI_BAR_UNMAPPED; - - wmask = ~(size - 1); - if (region_num == PCI_ROM_SLOT) { - /* ROM enable bit is writable */ - wmask |= PCI_ROM_ADDRESS_ENABLE; - } - - addr = pci_bar(pci_dev, region_num); - pci_set_long(pci_dev->config + addr, type); - - if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) && - r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(pci_dev->wmask + addr, wmask); - pci_set_quad(pci_dev->cmask + addr, ~0ULL); - } else { - pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); - pci_set_long(pci_dev->cmask + addr, 0xffffffff); - } + pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(pci_dev->cmask + addr, 0xffffffff); } } @@ -2118,11 +2109,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } - if (!pcie_sriov_register_device(pci_dev, errp)) { - pci_qdev_unrealize(DEVICE(pci_dev)); - return; - } - /* * A PCIe Downstream Port that do not have ARI Forwarding enabled must * associate only Device 0 with the device attached to the bus diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 0fc9f810b9..15a4aac1f4 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,8 +20,6 @@ #include "qapi/error.h" #include "trace.h" -static GHashTable *pfs; - static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) { for (uint16_t i = 0; i < total_vfs; i++) { @@ -33,49 +31,14 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) dev->exp.sriov_pf.vf = NULL; } -static void clear_ctrl_vfe(PCIDevice *dev) -{ - uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; - pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); -} - -static void register_vfs(PCIDevice *dev) -{ - uint16_t num_vfs; - uint16_t i; - uint16_t sriov_cap = dev->exp.sriov_cap; - - assert(sriov_cap > 0); - num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); - if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - clear_ctrl_vfe(dev); - return; - } - - trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); - } -} - -static void unregister_vfs(PCIDevice *dev) -{ - uint16_t i; - uint8_t *cfg = dev->config + dev->exp.sriov_cap; - - trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); - } -} - -static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, - uint16_t vf_dev_id, uint16_t init_vfs, - uint16_t total_vfs, uint16_t vf_offset, - uint16_t vf_stride, Error **errp) +bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride, + Error **errp) { + BusState *bus = qdev_get_parent_bus(&dev->qdev); + int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; @@ -137,28 +100,6 @@ static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); - return true; -} - -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, - const char *vfname, uint16_t vf_dev_id, - uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) -{ - BusState *bus = qdev_get_parent_bus(&dev->qdev); - int32_t devfn = dev->devfn + vf_offset; - - if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) { - error_setg(errp, "attaching user-created SR-IOV VF unsupported"); - return false; - } - - if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs, - total_vfs, vf_offset, vf_stride, errp)) { - return false; - } - dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); for (uint16_t i = 0; i < total_vfs; i++) { @@ -188,24 +129,7 @@ void pcie_sriov_pf_exit(PCIDevice *dev) { uint8_t *cfg = dev->config + dev->exp.sriov_cap; - if (dev->exp.sriov_pf.vf_user_created) { - uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); - uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF); - uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID); - - unregister_vfs(dev); - - for (uint16_t i = 0; i < total_vfs; i++) { - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - - vf->exp.sriov_vf.pf = NULL; - - pci_config_set_vendor_id(vf->config, ven_id); - pci_config_set_device_id(vf->config, vf_dev_id); - } - } else { - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); - } + unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -238,172 +162,74 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory) { + PCIIORegion *r; + PCIBus *bus = pci_get_bus(dev); uint8_t type; + pcibus_t size = memory_region_size(memory); - assert(dev->exp.sriov_vf.pf); + assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ + assert(region_num >= 0); + assert(region_num < PCI_NUM_REGIONS); type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; - return pci_register_bar(dev, region_num, type, memory); + if (!is_power_of_2(size)) { + error_report("%s: PCI region size must be a power" + " of two - type=0x%x, size=0x%"FMT_PCIBUS, + __func__, type, size); + exit(1); + } + + r = &dev->io_regions[region_num]; + r->memory = memory; + r->address_space = + type & PCI_BASE_ADDRESS_SPACE_IO + ? bus->address_space_io + : bus->address_space_mem; + r->size = size; + r->type = type; + + r->addr = pci_bar_address(dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } } -static gint compare_vf_devfns(gconstpointer a, gconstpointer b) +static void clear_ctrl_vfe(PCIDevice *dev) { - return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn; + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); } -int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, - uint16_t offset, - Error **errp) +static void register_vfs(PCIDevice *dev) { - GPtrArray *pf; - PCIDevice **vfs; - BusState *bus = qdev_get_parent_bus(DEVICE(dev)); - uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID); - uint16_t vf_dev_id; - uint16_t vf_offset; - uint16_t vf_stride; + uint16_t num_vfs; uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; - if (!pfs || !dev->qdev.id) { - return 0; + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + clear_ctrl_vfe(dev); + return; } - pf = g_hash_table_lookup(pfs, dev->qdev.id); - if (!pf) { - return 0; + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } - - if (pf->len > UINT16_MAX) { - error_setg(errp, "too many VFs"); - return -1; - } - - g_ptr_array_sort(pf, compare_vf_devfns); - vfs = (void *)pf->pdata; - - if (vfs[0]->devfn <= dev->devfn) { - error_setg(errp, "a VF function number is less than the PF function number"); - return -1; - } - - vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID); - vf_offset = vfs[0]->devfn - dev->devfn; - vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn; - - for (i = 0; i < pf->len; i++) { - if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) { - error_setg(errp, "SR-IOV VF parent bus mismatches with PF"); - return -1; - } - - if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) { - error_setg(errp, "SR-IOV VF vendor ID mismatches with PF"); - return -1; - } - - if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) { - error_setg(errp, "inconsistent SR-IOV VF device IDs"); - return -1; - } - - for (size_t j = 0; j < PCI_NUM_REGIONS; j++) { - if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size || - vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) { - error_setg(errp, "inconsistent SR-IOV BARs"); - return -1; - } - } - - if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) { - error_setg(errp, "inconsistent SR-IOV stride"); - return -1; - } - } - - if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len, - pf->len, vf_offset, vf_stride, errp)) { - return -1; - } - - for (i = 0; i < pf->len; i++) { - vfs[i]->exp.sriov_vf.pf = dev; - vfs[i]->exp.sriov_vf.vf_number = i; - - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(vfs[i]->config, 0xffff); - pci_config_set_device_id(vfs[i]->config, 0xffff); - } - - dev->exp.sriov_pf.vf = vfs; - dev->exp.sriov_pf.vf_user_created = true; - - for (i = 0; i < PCI_NUM_REGIONS; i++) { - PCIIORegion *region = &vfs[0]->io_regions[i]; - - if (region->size) { - pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size); - } - } - - return PCI_EXT_CAP_SRIOV_SIZEOF; } -bool pcie_sriov_register_device(PCIDevice *dev, Error **errp) +static void unregister_vfs(PCIDevice *dev) { - if (!dev->exp.sriov_pf.vf && dev->qdev.id && - pfs && g_hash_table_contains(pfs, dev->qdev.id)) { - error_setg(errp, "attaching user-created SR-IOV VF unsupported"); - return false; - } + uint16_t i; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; - if (dev->sriov_pf) { - PCIDevice *pci_pf; - GPtrArray *pf; - - if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) { - error_setg(errp, "user cannot create SR-IOV VF with this device type"); - return false; - } - - if (!pci_is_express(dev)) { - error_setg(errp, "PCI Express is required for SR-IOV VF"); - return false; - } - - if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) { - error_setg(errp, "PCI device specified as SR-IOV PF already exists"); - return false; - } - - if (!pfs) { - pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); - } - - pf = g_hash_table_lookup(pfs, dev->sriov_pf); - if (!pf) { - pf = g_ptr_array_new(); - g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf); - } - - g_ptr_array_add(pf, dev); - } - - return true; -} - -void pcie_sriov_unregister_device(PCIDevice *dev) -{ - if (dev->sriov_pf && pfs) { - GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf); - - if (pf) { - g_ptr_array_remove_fast(pf, dev); - - if (!pf->len) { - g_hash_table_remove(pfs, dev->sriov_pf); - g_ptr_array_free(pf, FALSE); - } - } + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } } @@ -490,7 +316,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) uint16_t pcie_sriov_vf_number(PCIDevice *dev) { - assert(dev->exp.sriov_vf.pf); + assert(pci_is_vf(dev)); return dev->exp.sriov_vf.vf_number; } diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index e7e41cb939..1ff3ce94e2 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -38,8 +38,6 @@ struct PCIDeviceClass { uint16_t subsystem_id; /* only for header type = 0 */ const char *romfile; /* rom bar */ - - bool sriov_vf_user_creatable; }; enum PCIReqIDType { @@ -169,8 +167,6 @@ struct PCIDevice { /* ID of standby device in net_failover pair */ char *failover_pair_id; uint32_t acpi_index; - - char *sriov_pf; }; static inline int pci_intx(PCIDevice *pci_dev) @@ -203,7 +199,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d) static inline int pci_is_vf(const PCIDevice *d) { - return d->sriov_pf || d->exp.sriov_vf.pf != NULL; + return d->exp.sriov_vf.pf != NULL; } static inline uint32_t pci_config_size(const PCIDevice *d) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index f75b8f22ee..c5d2d318d3 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,7 +18,6 @@ typedef struct PCIESriovPF { uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ - bool vf_user_created; /* If VFs are created by user */ } PCIESriovPF; typedef struct PCIESriovVF { @@ -41,23 +40,6 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, MemoryRegion *memory); -/** - * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created - * VFs. - * @dev: A PCIe device being realized. - * @offset: The offset of the SR-IOV capability. - * @errp: pointer to Error*, to store an error if it happens. - * - * Return: The size of added capability. 0 if the user did not create VFs. - * -1 if failed. - */ -int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, - uint16_t offset, - Error **errp); - -bool pcie_sriov_register_device(PCIDevice *dev, Error **errp); -void pcie_sriov_unregister_device(PCIDevice *dev); - /* * Default (minimal) page size support values * as required by the SR/IOV standard: From c8597d3e1c79a27cbf8ec3bed1a58a426f29ae21 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:20 -0400 Subject: [PATCH 06/19] Revert "pcie_sriov: Check PCI Express for SR-IOV PF" This reverts commit 47cc753e50076c25334091783738be9f716253b1. Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 15a4aac1f4..6c79658b4c 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -42,11 +42,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, uint8_t *cfg = dev->config + offset; uint8_t *wmask; - if (!pci_is_express(dev)) { - error_setg(errp, "PCI Express is required for SR-IOV PF"); - return false; - } - if (pci_is_vf(dev)) { error_setg(errp, "a device cannot be both an SR-IOV PF and a VF"); return false; From da44479b1d79dd31eabaa865260ad3e679cdf2e1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:21 -0400 Subject: [PATCH 07/19] Revert "pcie_sriov: Ensure PF and VF are mutually exclusive" This reverts commit 78f9d7fd1989311040beff54979bcb2a1ba0aff2. Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 6c79658b4c..56523ab4e8 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -42,11 +42,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, uint8_t *cfg = dev->config + offset; uint8_t *wmask; - if (pci_is_vf(dev)) { - error_setg(errp, "a device cannot be both an SR-IOV PF and a VF"); - return false; - } - if (total_vfs) { uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); uint16_t first_vf_devfn = dev->devfn + vf_offset; From 558452512f068bfb3584a5a8b15530b0c53074c7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:22 -0400 Subject: [PATCH 08/19] Revert "hw/pci: Fix SR-IOV VF number calculation" This reverts commit ca6dd3aef8a103138c99788bcba8195d4905ddc5. Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index cf2794879d..4c7be52951 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1437,11 +1437,7 @@ static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); uint16_t vf_stride = pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); - uint32_t vf_num = d->devfn - (pf->devfn + vf_offset); - - if (vf_num) { - vf_num /= vf_stride; - } + uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { new_addr = pci_get_quad(pf->config + bar); From b9ba81769499533488540b6d5ed7c7569476a427 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:38 -0400 Subject: [PATCH 09/19] Revert "pcie_sriov: Register VFs after migration" This reverts commit 107a64b9a360cf5ca046852bc03334f7a9f22aef. Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 7 ------- hw/pci/pcie_sriov.c | 7 ------- include/hw/pci/pcie_sriov.h | 2 -- 3 files changed, 16 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4c7be52951..5c0050e178 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -733,17 +733,10 @@ static bool migrate_is_not_pcie(void *opaque, int version_id) return !pci_is_express((PCIDevice *)opaque); } -static int pci_post_load(void *opaque, int version_id) -{ - pcie_sriov_pf_post_load(opaque); - return 0; -} - const VMStateDescription vmstate_pci_device = { .name = "PCIDevice", .version_id = 2, .minimum_version_id = 1, - .post_load = pci_post_load, .fields = (const VMStateField[]) { VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice, diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 56523ab4e8..fae6acea4a 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -252,13 +252,6 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, } } -void pcie_sriov_pf_post_load(PCIDevice *dev) -{ - if (dev->exp.sriov_cap) { - register_vfs(dev); - } -} - /* Reset SR/IOV */ void pcie_sriov_pf_reset(PCIDevice *dev) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index c5d2d318d3..5148c5b77d 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -57,8 +57,6 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize); void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, uint32_t val, int len); -void pcie_sriov_pf_post_load(PCIDevice *dev); - /* Reset SR/IOV */ void pcie_sriov_pf_reset(PCIDevice *dev); From ae9c192de7ab0f56f32d8b60b6568917e0138919 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:41 -0400 Subject: [PATCH 10/19] Revert "pcie_sriov: Remove num_vfs from PCIESriovPF" This reverts commit cbd9e5120bac3e292eee77b7a2e3692f235a1a26. Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 28 ++++++++-------------------- hw/pci/trace-events | 2 +- include/hw/pci/pcie_sriov.h | 1 + 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index fae6acea4a..9bd7f8acc3 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -57,6 +57,7 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; + dev->exp.sriov_pf.num_vfs = 0; dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -185,12 +186,6 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } -static void clear_ctrl_vfe(PCIDevice *dev) -{ - uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; - pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); -} - static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; @@ -200,7 +195,6 @@ static void register_vfs(PCIDevice *dev) assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { - clear_ctrl_vfe(dev); return; } @@ -209,18 +203,20 @@ static void register_vfs(PCIDevice *dev) for (i = 0; i < num_vfs; i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } + dev->exp.sriov_pf.num_vfs = num_vfs; } static void unregister_vfs(PCIDevice *dev) { + uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; uint16_t i; - uint8_t *cfg = dev->config + dev->exp.sriov_cap; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn)); - for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } + dev->exp.sriov_pf.num_vfs = 0; } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -246,9 +242,6 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, } else { unregister_vfs(dev); } - } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { - clear_ctrl_vfe(dev); - unregister_vfs(dev); } } @@ -311,7 +304,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) { assert(!pci_is_vf(dev)); - if (n < pcie_sriov_num_vfs(dev)) { + if (n < dev->exp.sriov_pf.num_vfs) { return dev->exp.sriov_pf.vf[n]; } return NULL; @@ -319,10 +312,5 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) uint16_t pcie_sriov_num_vfs(PCIDevice *dev) { - uint16_t sriov_cap = dev->exp.sriov_cap; - uint8_t *cfg = dev->config + sriov_cap; - - return sriov_cap && - (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? - pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; + return dev->exp.sriov_pf.num_vfs; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index e98f575a9d..19643aa8c6 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask # hw/pci/pcie_sriov.c sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" -sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" +sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" # pcie.c diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 5148c5b77d..70649236c1 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -16,6 +16,7 @@ #include "hw/pci/pci.h" typedef struct PCIESriovPF { + uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; From 9bab08da4e932e9c95919951792ae09d0a59f726 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:41 -0400 Subject: [PATCH 11/19] Revert "pcie_sriov: Release VFs failed to realize" This reverts commit 1a9bf009012e590cb166a4a9bae4bc18fb084d76. Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 9bd7f8acc3..faadb0d2ea 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -99,8 +99,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, vf->exp.sriov_vf.vf_number = i; if (!qdev_realize(&vf->qdev, bus, errp)) { - object_unparent(OBJECT(vf)); - object_unref(vf); unparent_vfs(dev, i); return false; } From b1282f1e352db9947267a6524c6ded9678e82629 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:42 -0400 Subject: [PATCH 12/19] Revert "pcie_sriov: Reuse SR-IOV VF device instances" This reverts commit 139610ae67f6ecf92127bb7bf53ac6265b459ec8. Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 2 +- hw/pci/pcie_sriov.c | 95 +++++++++++++++++++++---------------- include/hw/pci/pci.h | 5 ++ include/hw/pci/pci_device.h | 15 ------ include/hw/pci/pcie_sriov.h | 1 + 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 5c0050e178..b532888e8f 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2895,7 +2895,7 @@ void pci_set_enabled(PCIDevice *d, bool state) memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_MASTER) && d->enabled); - if (d->qdev.realized) { + if (!d->enabled) { pci_device_reset(d); } } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index faadb0d2ea..f0bde0d3fc 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -20,16 +20,9 @@ #include "qapi/error.h" #include "trace.h" -static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs) -{ - for (uint16_t i = 0; i < total_vfs; i++) { - PCIDevice *vf = dev->exp.sriov_pf.vf[i]; - object_unparent(OBJECT(vf)); - object_unref(OBJECT(vf)); - } - g_free(dev->exp.sriov_pf.vf); - dev->exp.sriov_pf.vf = NULL; -} +static PCIDevice *register_vf(PCIDevice *pf, int devfn, + const char *name, uint16_t vf_num); +static void unregister_vfs(PCIDevice *dev); bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, @@ -37,8 +30,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, uint16_t vf_offset, uint16_t vf_stride, Error **errp) { - BusState *bus = qdev_get_parent_bus(&dev->qdev); - int32_t devfn = dev->devfn + vf_offset; uint8_t *cfg = dev->config + offset; uint8_t *wmask; @@ -58,6 +49,7 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; dev->exp.sriov_pf.num_vfs = 0; + dev->exp.sriov_pf.vfname = g_strdup(vfname); dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -91,34 +83,14 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, qdev_prop_set_bit(&dev->qdev, "multifunction", true); - dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs); - - for (uint16_t i = 0; i < total_vfs; i++) { - PCIDevice *vf = pci_new(devfn, vfname); - vf->exp.sriov_vf.pf = dev; - vf->exp.sriov_vf.vf_number = i; - - if (!qdev_realize(&vf->qdev, bus, errp)) { - unparent_vfs(dev, i); - return false; - } - - /* set vid/did according to sr/iov spec - they are not used */ - pci_config_set_vendor_id(vf->config, 0xffff); - pci_config_set_device_id(vf->config, 0xffff); - - dev->exp.sriov_pf.vf[i] = vf; - devfn += vf_stride; - } - return true; } void pcie_sriov_pf_exit(PCIDevice *dev) { - uint8_t *cfg = dev->config + dev->exp.sriov_cap; - - unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)); + unregister_vfs(dev); + g_free((char *)dev->exp.sriov_pf.vfname); + dev->exp.sriov_pf.vfname = NULL; } void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, @@ -184,11 +156,38 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } +static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, + uint16_t vf_num) +{ + PCIDevice *dev = pci_new(devfn, name); + dev->exp.sriov_vf.pf = pf; + dev->exp.sriov_vf.vf_number = vf_num; + PCIBus *bus = pci_get_bus(pf); + Error *local_err = NULL; + + qdev_realize(&dev->qdev, &bus->qbus, &local_err); + if (local_err) { + error_report_err(local_err); + return NULL; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(dev->config, 0xffff); + pci_config_set_device_id(dev->config, 0xffff); + + return dev; +} + static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; uint16_t i; uint16_t sriov_cap = dev->exp.sriov_cap; + uint16_t vf_offset = + pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); + uint16_t vf_stride = + pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); + int32_t devfn = dev->devfn + vf_offset; assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); @@ -196,10 +195,18 @@ static void register_vfs(PCIDevice *dev) return; } + dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], true); + dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, + dev->exp.sriov_pf.vfname, i); + if (!dev->exp.sriov_pf.vf[i]) { + num_vfs = i; + break; + } + devfn += vf_stride; } dev->exp.sriov_pf.num_vfs = num_vfs; } @@ -212,8 +219,12 @@ static void unregister_vfs(PCIDevice *dev) trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { - pci_set_enabled(dev->exp.sriov_pf.vf[i], false); + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; dev->exp.sriov_pf.num_vfs = 0; } @@ -235,10 +246,14 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, PCI_FUNC(dev->devfn), off, val, len); if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { - if (val & PCI_SRIOV_CTRL_VFE) { - register_vfs(dev); + if (dev->exp.sriov_pf.num_vfs) { + if (!(val & PCI_SRIOV_CTRL_VFE)) { + unregister_vfs(dev); + } } else { - unregister_vfs(dev); + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); + } } } } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 14a869eeaa..fe04b4fafd 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -680,4 +680,9 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); void pci_set_enabled(PCIDevice *pci_dev, bool state); +static inline void pci_set_power(PCIDevice *pci_dev, bool state) +{ + pci_set_enabled(pci_dev, state); +} + #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index 1ff3ce94e2..f38fb31119 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -212,21 +212,6 @@ static inline uint16_t pci_get_bdf(PCIDevice *dev) return PCI_BUILD_BDF(pci_bus_num(pci_get_bus(dev)), dev->devfn); } -static inline void pci_set_power(PCIDevice *pci_dev, bool state) -{ - /* - * Don't change the enabled state of VFs when powering on/off the device. - * - * When powering on, VFs must not be enabled immediately but they must - * wait until the guest configures SR-IOV. - * When powering off, their corresponding PFs will be reset and disable - * VFs. - */ - if (!pci_is_vf(pci_dev)) { - pci_set_enabled(pci_dev, state); - } -} - uint16_t pci_requester_id(PCIDevice *dev); /* DMA access functions */ diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 70649236c1..aa704e8f9d 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -18,6 +18,7 @@ typedef struct PCIESriovPF { uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ + const char *vfname; /* Reference to the device type used for the VFs */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; From 19c45c00dc6a52f80f27dabbd28de1b770c16a89 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:43 -0400 Subject: [PATCH 13/19] Revert "pcie_sriov: Ensure VF function number does not overflow" This reverts commit 77718701157f6ca77ea7a57b536fa0a22f676082. Signed-off-by: Michael S. Tsirkin --- docs/pcie_sriov.txt | 8 +++----- hw/net/igb.c | 13 +++---------- hw/nvme/ctrl.c | 24 ++++++++---------------- hw/pci/pcie_sriov.c | 19 ++----------------- include/hw/pci/pcie_sriov.h | 5 ++--- 5 files changed, 18 insertions(+), 51 deletions(-) diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt index ab2142807f..a47aad0bfa 100644 --- a/docs/pcie_sriov.txt +++ b/docs/pcie_sriov.txt @@ -52,11 +52,9 @@ setting up a BAR for a VF. ... /* Add and initialize the SR/IOV capability */ - if (!pcie_sriov_pf_init(d, 0x200, "your_virtual_dev", - vf_devid, initial_vfs, total_vfs, - fun_offset, stride, errp)) { - return; - } + pcie_sriov_pf_init(d, 0x200, "your_virtual_dev", + vf_devid, initial_vfs, total_vfs, + fun_offset, stride); /* Set up individual VF BARs (parameters as for normal BARs) */ pcie_sriov_pf_init_vf_bar( ... ) diff --git a/hw/net/igb.c b/hw/net/igb.c index b6ca2f1b8a..b92bba402e 100644 --- a/hw/net/igb.c +++ b/hw/net/igb.c @@ -446,16 +446,9 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_ari_init(pci_dev, 0x150); - if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, - TYPE_IGBVF, IGB_82576_VF_DEV_ID, - IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, - IGB_VF_OFFSET, IGB_VF_STRIDE, - errp)) { - pcie_cap_exit(pci_dev); - igb_cleanup_msix(s); - msi_uninit(pci_dev); - return; - } + pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF, + IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, + IGB_VF_OFFSET, IGB_VF_STRIDE); pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e86ea2e7ce..c6d4f61a47 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8271,8 +8271,7 @@ out: return pow2ceil(bar_size); } -static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, - Error **errp) +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) { uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; @@ -8281,17 +8280,12 @@ static bool nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset, le16_to_cpu(cap->vifrsm), NULL, NULL); - if (!pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, - n->params.sriov_max_vfs, n->params.sriov_max_vfs, - NVME_VF_OFFSET, NVME_VF_STRIDE, - errp)) { - return false; - } + pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE); pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); - - return true; } static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) @@ -8416,12 +8410,6 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) return false; } - if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs && - !nvme_init_sriov(n, pci_dev, 0x120, errp)) { - msix_uninit(pci_dev, &n->bar0, &n->bar0); - return false; - } - nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); pcie_cap_deverr_init(pci_dev); @@ -8451,6 +8439,10 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_init_pmr(n, pci_dev); } + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { + nvme_init_sriov(n, pci_dev, 0x120); + } + return true; } diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index f0bde0d3fc..499becd527 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -24,27 +24,14 @@ static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, uint16_t vf_num); static void unregister_vfs(PCIDevice *dev); -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, +void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp) + uint16_t vf_offset, uint16_t vf_stride) { uint8_t *cfg = dev->config + offset; uint8_t *wmask; - if (total_vfs) { - uint16_t ari_cap = pcie_find_capability(dev, PCI_EXT_CAP_ID_ARI); - uint16_t first_vf_devfn = dev->devfn + vf_offset; - uint16_t last_vf_devfn = first_vf_devfn + vf_stride * (total_vfs - 1); - - if ((!ari_cap && PCI_SLOT(dev->devfn) != PCI_SLOT(last_vf_devfn)) || - last_vf_devfn >= PCI_DEVFN_MAX) { - error_setg(errp, "VF function number overflows"); - return false; - } - } - pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; @@ -82,8 +69,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); qdev_prop_set_bit(&dev->qdev, "multifunction", true); - - return true; } void pcie_sriov_pf_exit(PCIDevice *dev) diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index aa704e8f9d..450cbef6c2 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -27,11 +27,10 @@ typedef struct PCIESriovVF { uint16_t vf_number; /* Logical VF number of this function */ } PCIESriovVF; -bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, +void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, const char *vfname, uint16_t vf_dev_id, uint16_t init_vfs, uint16_t total_vfs, - uint16_t vf_offset, uint16_t vf_stride, - Error **errp); + uint16_t vf_offset, uint16_t vf_stride); void pcie_sriov_pf_exit(PCIDevice *dev); /* Set up a VF bar in the SR/IOV bar area */ From b0fdaee5d1ed52f650ca0c3bebfce3dd4dc8ddb5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:45 -0400 Subject: [PATCH 14/19] Revert "pcie_sriov: Do not manually unrealize" This reverts commit c613ad25125bf3016aa8f81ce170f5ac91d2379f. Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie_sriov.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 499becd527..e9b23221d7 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -204,7 +204,11 @@ static void unregister_vfs(PCIDevice *dev) trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), num_vfs); for (i = 0; i < num_vfs; i++) { + Error *err = NULL; PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { + error_reportf_err(err, "Failed to unplug: "); + } object_unparent(OBJECT(vf)); object_unref(OBJECT(vf)); } From 47279e8afa84cb28e84e3ac4392487b94a494fa9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:47 -0400 Subject: [PATCH 15/19] Revert "hw/ppc/spapr_pci: Do not reject VFs created after a PF" This reverts commit 26f86093ec989cb73ad03e8a234f5dc321e1e267. Signed-off-by: Michael S. Tsirkin --- hw/ppc/spapr_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index ed4454bbf7..f63182a03c 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1573,9 +1573,7 @@ static void spapr_pci_pre_plug(HotplugHandler *plug_handler, * hotplug, we do not allow functions to be hotplugged to a * slot that already has function 0 present */ - if (plugged_dev->hotplugged && - !pci_is_vf(pdev) && - bus->devices[PCI_DEVFN(slotnr, 0)] && + if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] && PCI_FUNC(pdev->devfn) != 0) { error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " additional functions can no longer be exposed to guest.", From f1feffc4ef4b1c02daa5ce23fa8ad728c8ef7ce9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:49 -0400 Subject: [PATCH 16/19] Revert "hw/ppc/spapr_pci: Do not create DT for disabled PCI device" This reverts commit 723c5b4628d047e43825a046c6ee517b82b88117. Signed-off-by: Michael S. Tsirkin --- hw/ppc/spapr_pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index f63182a03c..7cf9904c35 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1296,10 +1296,6 @@ static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev, return; } - if (!pdev->enabled) { - return; - } - err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset); if (err < 0) { p->err = err; From 93829009a685687e2fb48158cb5df18c4eb49d07 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 1 Aug 2024 03:44:50 -0400 Subject: [PATCH 17/19] Revert "hw/pci: Rename has_power to enabled" This reverts commit 6a31b219a5338564f3978251c79f96f689e037da. Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 14 +++++++------- hw/pci/pci_host.c | 4 ++-- include/hw/pci/pci.h | 7 +------ include/hw/pci/pci_device.h | 2 +- 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index b532888e8f..fab86d0567 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1525,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); - if (!d->enabled) { + if (!d->has_power) { new_addr = PCI_BAR_UNMAPPED; } @@ -1613,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -2884,18 +2884,18 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } -void pci_set_enabled(PCIDevice *d, bool state) +void pci_set_power(PCIDevice *d, bool state) { - if (d->enabled == state) { + if (d->has_power == state) { return; } - d->enabled = state; + d->has_power = state; pci_update_mappings(d); memory_region_set_enabled(&d->bus_master_enable_region, (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); - if (!d->enabled) { + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { pci_device_reset(d); } } diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 0d82727cc9..dfe6fe6184 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { + !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { return; } @@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, * allowing direct removal of unexposed functions. */ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || - !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { + !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { return ~0x0; } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index fe04b4fafd..eb26cac810 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -678,11 +678,6 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); -void pci_set_enabled(PCIDevice *pci_dev, bool state); - -static inline void pci_set_power(PCIDevice *pci_dev, bool state) -{ - pci_set_enabled(pci_dev, state); -} +void pci_set_power(PCIDevice *pci_dev, bool state); #endif diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index f38fb31119..15694f2489 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -57,7 +57,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; - bool enabled; + bool has_power; /* PCI config space */ uint8_t *config; From 9a45b0761628cc59267b3283a85d15294464ac31 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Wed, 31 Jul 2024 18:00:19 +0100 Subject: [PATCH 18/19] hw/i386/amd_iommu: Don't leak memory in amdvi_update_iotlb() In amdvi_update_iotlb() we will only put a new entry in the hash table if to_cache.perm is not IOMMU_NONE. However we allocate the memory for the new AMDVIIOTLBEntry and for the hash table key regardless. This means that in the IOMMU_NONE case we will leak the memory we alloacted. Move the allocations into the if() to the point where we know we're going to add the item to the hash table. Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2452 Signed-off-by: Peter Maydell Message-Id: <20240731170019.3590563-1-peter.maydell@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 6d4fde72f9..87643d2891 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -357,12 +357,12 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, uint64_t gpa, IOMMUTLBEntry to_cache, uint16_t domid) { - AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); - uint64_t *key = g_new(uint64_t, 1); - uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; - /* don't cache erroneous translations */ if (to_cache.perm != IOMMU_NONE) { + AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); + uint64_t *key = g_new(uint64_t, 1); + uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; + trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), gpa, to_cache.translated_addr); From 515457757ff8540c524ff39ea1d9564b251c6532 Mon Sep 17 00:00:00 2001 From: yeeli Date: Thu, 25 Jul 2024 11:18:58 +0800 Subject: [PATCH 19/19] intel_iommu: Fix for IQA reg read dropped DW field If VT-D hardware supports scalable mode, Linux will set the IQA DW field (bit11). In qemu, the vtd_mem_write and vtd_update_iq_dw set DW field well. However, vtd_mem_read the DW field wrong because "& VTD_IQA_QS" dropped the value of DW. Replace "&VTD_IQA_QS" with "& (VTD_IQA_QS | VTD_IQA_DW_MASK)" could save the DW field. Test patch as below: config the "x-scalable-mode" option: "-device intel-iommu,caching-mode=on,x-scalable-mode=on,aw-bits=48" After Linux OS boot, check the IQA_REG DW Field by usage 1 or 2: 1. IOMMU_DEBUGFS: Before fix: cat /sys/kernel/debug/iommu/intel/iommu_regset |grep IQA IQA 0x90 0x00000001001da001 After fix: cat /sys/kernel/debug/iommu/intel/iommu_regset |grep IQA IQA 0x90 0x00000001001da801 Check DW field(bit11) is 1. 2. devmem2 read the IQA_REG (offset 0x90): Before fix: devmem2 0xfed90090 /dev/mem opened. Memory mapped at address 0x7f72c795b000. Value at address 0xFED90090 (0x7f72c795b090): 0x1DA001 After fix: devmem2 0xfed90090 /dev/mem opened. Memory mapped at address 0x7fc95281c000. Value at address 0xFED90090 (0x7fc95281c090): 0x1DA801 Check DW field(bit11) is 1. Signed-off-by: yeeli Message-Id: <20240725031858.1529902-1-seven.yi.lee@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9a768f0b44..16d2885fcc 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2947,7 +2947,9 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) /* Invalidation Queue Address Register, 64-bit */ case DMAR_IQA_REG: - val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS); + val = s->iq | + (vtd_get_quad(s, DMAR_IQA_REG) & + (VTD_IQA_QS | VTD_IQA_DW_MASK)); if (size == 4) { val = val & ((1ULL << 32) - 1); }