From 6e7998ceb9008e82501c7cf069e5552c7e352c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 7 Jan 2025 14:06:04 +0100 Subject: [PATCH 01/12] vfio/igd: Fix potential overflow in igd_gtt_memory_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The risk is mainly theoretical since the applied bit mask will keep the 'ggms' shift value below 3. Nevertheless, let's use a 64 bit integer type and resolve the coverity issue. Resolves: Coverity CID 1585908 Fixes: 1e1eac5f3dcd ("vfio/igd: canonicalize memory size calculations") Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250107130604.669697-1-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/igd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 0740a5dd8c..b5303ea565 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -133,7 +133,7 @@ static uint64_t igd_gtt_memory_size(int gen, uint16_t gmch) } else { ggms = (gmch >> IGD_GMCH_GEN8_GGMS_SHIFT) & IGD_GMCH_GEN8_GGMS_MASK; if (ggms != 0) { - ggms = 1 << ggms; + ggms = 1ULL << ggms; } } From dee69a8ca6ebc325939dc0b6800becdb26cd9dbf Mon Sep 17 00:00:00 2001 From: Tomita Moeko Date: Sat, 4 Jan 2025 23:42:16 +0800 Subject: [PATCH 02/12] vfio/pci: declare generic quirks in a new header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare generic vfio_generic_{window_address,window_data,mirror}_quirk in newly created pci_quirks.h so that they can be used elsewhere, like igd.c. Signed-off-by: Tomita Moeko Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250104154219.7209-2-tomitamoeko@gmail.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci-quirks.c | 55 +++------------------------------- hw/vfio/pci-quirks.h | 71 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 51 deletions(-) create mode 100644 hw/vfio/pci-quirks.h diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c8e60475d5..2258fdda5f 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -25,6 +25,7 @@ #include "hw/nvram/fw_cfg.h" #include "hw/qdev-properties.h" #include "pci.h" +#include "pci-quirks.h" #include "trace.h" /* @@ -66,40 +67,6 @@ bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev) * Device specific region quirks (mostly backdoors to PCI config space) */ -/* - * The generic window quirks operate on an address and data register, - * vfio_generic_window_address_quirk handles the address register and - * vfio_generic_window_data_quirk handles the data register. These ops - * pass reads and writes through to hardware until a value matching the - * stored address match/mask is written. When this occurs, the data - * register access emulated PCI config space for the device rather than - * passing through accesses. This enables devices where PCI config space - * is accessible behind a window register to maintain the virtualization - * provided through vfio. - */ -typedef struct VFIOConfigWindowMatch { - uint32_t match; - uint32_t mask; -} VFIOConfigWindowMatch; - -typedef struct VFIOConfigWindowQuirk { - struct VFIOPCIDevice *vdev; - - uint32_t address_val; - - uint32_t address_offset; - uint32_t data_offset; - - bool window_enabled; - uint8_t bar; - - MemoryRegion *addr_mem; - MemoryRegion *data_mem; - - uint32_t nr_matches; - VFIOConfigWindowMatch matches[]; -} VFIOConfigWindowQuirk; - static uint64_t vfio_generic_window_quirk_address_read(void *opaque, hwaddr addr, unsigned size) @@ -135,7 +102,7 @@ static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr, } } -static const MemoryRegionOps vfio_generic_window_address_quirk = { +const MemoryRegionOps vfio_generic_window_address_quirk = { .read = vfio_generic_window_quirk_address_read, .write = vfio_generic_window_quirk_address_write, .endianness = DEVICE_LITTLE_ENDIAN, @@ -178,26 +145,12 @@ static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr, addr + window->data_offset, data, size); } -static const MemoryRegionOps vfio_generic_window_data_quirk = { +const MemoryRegionOps vfio_generic_window_data_quirk = { .read = vfio_generic_window_quirk_data_read, .write = vfio_generic_window_quirk_data_write, .endianness = DEVICE_LITTLE_ENDIAN, }; -/* - * The generic mirror quirk handles devices which expose PCI config space - * through a region within a BAR. When enabled, reads and writes are - * redirected through to emulated PCI config space. XXX if PCI config space - * used memory regions, this could just be an alias. - */ -typedef struct VFIOConfigMirrorQuirk { - struct VFIOPCIDevice *vdev; - uint32_t offset; - uint8_t bar; - MemoryRegion *mem; - uint8_t data[]; -} VFIOConfigMirrorQuirk; - static uint64_t vfio_generic_quirk_mirror_read(void *opaque, hwaddr addr, unsigned size) { @@ -228,7 +181,7 @@ static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr, addr, data); } -static const MemoryRegionOps vfio_generic_mirror_quirk = { +const MemoryRegionOps vfio_generic_mirror_quirk = { .read = vfio_generic_quirk_mirror_read, .write = vfio_generic_quirk_mirror_write, .endianness = DEVICE_LITTLE_ENDIAN, diff --git a/hw/vfio/pci-quirks.h b/hw/vfio/pci-quirks.h new file mode 100644 index 0000000000..c0e96a01cc --- /dev/null +++ b/hw/vfio/pci-quirks.h @@ -0,0 +1,71 @@ +/* + * vfio generic region quirks (mostly backdoors to PCI config space) + * + * Copyright Red Hat, Inc. 2012-2015 + * + * Authors: + * Alex Williamson + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef HW_VFIO_VFIO_PCI_QUIRKS_H +#define HW_VFIO_VFIO_PCI_QUIRKS_H + +#include "qemu/osdep.h" +#include "exec/memop.h" + +/* + * The generic window quirks operate on an address and data register, + * vfio_generic_window_address_quirk handles the address register and + * vfio_generic_window_data_quirk handles the data register. These ops + * pass reads and writes through to hardware until a value matching the + * stored address match/mask is written. When this occurs, the data + * register access emulated PCI config space for the device rather than + * passing through accesses. This enables devices where PCI config space + * is accessible behind a window register to maintain the virtualization + * provided through vfio. + */ +typedef struct VFIOConfigWindowMatch { + uint32_t match; + uint32_t mask; +} VFIOConfigWindowMatch; + +typedef struct VFIOConfigWindowQuirk { + struct VFIOPCIDevice *vdev; + + uint32_t address_val; + + uint32_t address_offset; + uint32_t data_offset; + + bool window_enabled; + uint8_t bar; + + MemoryRegion *addr_mem; + MemoryRegion *data_mem; + + uint32_t nr_matches; + VFIOConfigWindowMatch matches[]; +} VFIOConfigWindowQuirk; + +extern const MemoryRegionOps vfio_generic_window_address_quirk; +extern const MemoryRegionOps vfio_generic_window_data_quirk; + +/* + * The generic mirror quirk handles devices which expose PCI config space + * through a region within a BAR. When enabled, reads and writes are + * redirected through to emulated PCI config space. XXX if PCI config space + * used memory regions, this could just be an alias. + */ +typedef struct VFIOConfigMirrorQuirk { + struct VFIOPCIDevice *vdev; + uint32_t offset; + uint8_t bar; + MemoryRegion *mem; + uint8_t data[]; +} VFIOConfigMirrorQuirk; + +extern const MemoryRegionOps vfio_generic_mirror_quirk; + +#endif /* HW_VFIO_VFIO_PCI_QUIRKS_H */ From f36e7ba9750cbeffdc465c2d195a232d7cb67555 Mon Sep 17 00:00:00 2001 From: Tomita Moeko Date: Sat, 4 Jan 2025 23:42:17 +0800 Subject: [PATCH 03/12] vfio/pci: introduce config_offset field in VFIOConfigMirrorQuirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Device may only expose a specific portion of PCI config space through a region in a BAR, such behavior is seen in igd GGC and BDSM mirrors in BAR0. To handle these, config_offset is introduced to allow mirroring arbitrary region in PCI config space. Signed-off-by: Tomita Moeko Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250104154219.7209-3-tomitamoeko@gmail.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci-quirks.c | 2 ++ hw/vfio/pci-quirks.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 2258fdda5f..fbe43b0a79 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -162,6 +162,7 @@ static uint64_t vfio_generic_quirk_mirror_read(void *opaque, (void)vfio_region_read(&vdev->bars[mirror->bar].region, addr + mirror->offset, size); + addr += mirror->config_offset; data = vfio_pci_read_config(&vdev->pdev, addr, size); trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name, memory_region_name(mirror->mem), @@ -175,6 +176,7 @@ static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr, VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; + addr += mirror->config_offset; vfio_pci_write_config(&vdev->pdev, addr, data, size); trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name, memory_region_name(mirror->mem), diff --git a/hw/vfio/pci-quirks.h b/hw/vfio/pci-quirks.h index c0e96a01cc..d1532e379b 100644 --- a/hw/vfio/pci-quirks.h +++ b/hw/vfio/pci-quirks.h @@ -60,7 +60,8 @@ extern const MemoryRegionOps vfio_generic_window_data_quirk; */ typedef struct VFIOConfigMirrorQuirk { struct VFIOPCIDevice *vdev; - uint32_t offset; + uint32_t offset; /* Offset in BAR */ + uint32_t config_offset; /* Offset in PCI config space */ uint8_t bar; MemoryRegion *mem; uint8_t data[]; From e45891e0b17884b7170c441eab3a5cdd856946f7 Mon Sep 17 00:00:00 2001 From: Tomita Moeko Date: Sat, 4 Jan 2025 23:42:18 +0800 Subject: [PATCH 04/12] vfio/igd: use VFIOConfigMirrorQuirk for mirrored registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of config_offset field, VFIOConfigMirrorQuirk can now be used for those mirrored register in igd bar0. This eliminates the need for the macro intoduced in 1a2623b5c9e7 ("vfio/igd: add macro for declaring mirrored registers"). Signed-off-by: Tomita Moeko Reviewed-by: Alex Williamson Link: https://lore.kernel.org/r/20250104154219.7209-4-tomitamoeko@gmail.com Signed-off-by: Cédric Le Goater --- hw/vfio/igd.c | 125 +++++++++++++------------------------------------- 1 file changed, 31 insertions(+), 94 deletions(-) diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index b5303ea565..b1a237edd6 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -18,6 +18,7 @@ #include "hw/hw.h" #include "hw/nvram/fw_cfg.h" #include "pci.h" +#include "pci-quirks.h" #include "trace.h" /* @@ -422,83 +423,13 @@ static const MemoryRegionOps vfio_igd_index_quirk = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static uint64_t vfio_igd_pci_config_read(VFIOPCIDevice *vdev, uint64_t offset, - unsigned size) -{ - switch (size) { - case 1: - return pci_get_byte(vdev->pdev.config + offset); - case 2: - return pci_get_word(vdev->pdev.config + offset); - case 4: - return pci_get_long(vdev->pdev.config + offset); - case 8: - return pci_get_quad(vdev->pdev.config + offset); - default: - hw_error("igd: unsupported pci config read at %"PRIx64", size %u", - offset, size); - break; - } - - return 0; -} - -static void vfio_igd_pci_config_write(VFIOPCIDevice *vdev, uint64_t offset, - uint64_t data, unsigned size) -{ - switch (size) { - case 1: - pci_set_byte(vdev->pdev.config + offset, data); - break; - case 2: - pci_set_word(vdev->pdev.config + offset, data); - break; - case 4: - pci_set_long(vdev->pdev.config + offset, data); - break; - case 8: - pci_set_quad(vdev->pdev.config + offset, data); - break; - default: - hw_error("igd: unsupported pci config write at %"PRIx64", size %u", - offset, size); - break; - } -} - -#define VFIO_IGD_QUIRK_MIRROR_REG(reg, name) \ -static uint64_t vfio_igd_quirk_read_##name(void *opaque, \ - hwaddr addr, unsigned size) \ -{ \ - VFIOPCIDevice *vdev = opaque; \ - \ - return vfio_igd_pci_config_read(vdev, reg + addr, size); \ -} \ - \ -static void vfio_igd_quirk_write_##name(void *opaque, hwaddr addr, \ - uint64_t data, unsigned size) \ -{ \ - VFIOPCIDevice *vdev = opaque; \ - \ - vfio_igd_pci_config_write(vdev, reg + addr, data, size); \ -} \ - \ -static const MemoryRegionOps vfio_igd_quirk_mirror_##name = { \ - .read = vfio_igd_quirk_read_##name, \ - .write = vfio_igd_quirk_write_##name, \ - .endianness = DEVICE_LITTLE_ENDIAN, \ -}; - -VFIO_IGD_QUIRK_MIRROR_REG(IGD_GMCH, ggc) -VFIO_IGD_QUIRK_MIRROR_REG(IGD_BDSM, bdsm) -VFIO_IGD_QUIRK_MIRROR_REG(IGD_BDSM_GEN11, bdsm64) - #define IGD_GGC_MMIO_OFFSET 0x108040 #define IGD_BDSM_MMIO_OFFSET 0x1080C0 void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) { - VFIOQuirk *quirk; + VFIOQuirk *ggc_quirk, *bdsm_quirk; + VFIOConfigMirrorQuirk *ggc_mirror, *bdsm_mirror; int gen; /* @@ -522,33 +453,39 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) return; } - quirk = vfio_quirk_alloc(2); - quirk->data = vdev; + ggc_quirk = vfio_quirk_alloc(1); + ggc_mirror = ggc_quirk->data = g_malloc0(sizeof(*ggc_mirror)); + ggc_mirror->mem = ggc_quirk->mem; + ggc_mirror->vdev = vdev; + ggc_mirror->bar = nr; + ggc_mirror->offset = IGD_GGC_MMIO_OFFSET; + ggc_mirror->config_offset = IGD_GMCH; - memory_region_init_io(&quirk->mem[0], OBJECT(vdev), - &vfio_igd_quirk_mirror_ggc, vdev, + memory_region_init_io(ggc_mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, ggc_mirror, "vfio-igd-ggc-quirk", 2); - memory_region_add_subregion_overlap(vdev->bars[0].region.mem, - IGD_GGC_MMIO_OFFSET, &quirk->mem[0], + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + ggc_mirror->offset, ggc_mirror->mem, 1); - if (gen < 11) { - memory_region_init_io(&quirk->mem[1], OBJECT(vdev), - &vfio_igd_quirk_mirror_bdsm, vdev, - "vfio-igd-bdsm-quirk", 4); - memory_region_add_subregion_overlap(vdev->bars[0].region.mem, - IGD_BDSM_MMIO_OFFSET, - &quirk->mem[1], 1); - } else { - memory_region_init_io(&quirk->mem[1], OBJECT(vdev), - &vfio_igd_quirk_mirror_bdsm64, vdev, - "vfio-igd-bdsm-quirk", 8); - memory_region_add_subregion_overlap(vdev->bars[0].region.mem, - IGD_BDSM_MMIO_OFFSET, - &quirk->mem[1], 1); - } + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, ggc_quirk, next); - QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + bdsm_quirk = vfio_quirk_alloc(1); + bdsm_mirror = bdsm_quirk->data = g_malloc0(sizeof(*bdsm_mirror)); + bdsm_mirror->mem = bdsm_quirk->mem; + bdsm_mirror->vdev = vdev; + bdsm_mirror->bar = nr; + bdsm_mirror->offset = IGD_BDSM_MMIO_OFFSET; + bdsm_mirror->config_offset = (gen < 11) ? IGD_BDSM : IGD_BDSM_GEN11; + + memory_region_init_io(bdsm_mirror->mem, OBJECT(vdev), + &vfio_generic_mirror_quirk, bdsm_mirror, + "vfio-igd-bdsm-quirk", (gen < 11) ? 4 : 8); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, + bdsm_mirror->offset, bdsm_mirror->mem, + 1); + + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); } void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) From 7b3d5b84cbd742356a1afc6b0fa489d0663f235d Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 16 Jan 2025 18:23:07 +0800 Subject: [PATCH 05/12] vfio/iommufd: Fix SIGSEV in iommufd_cdev_attach() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When iommufd_cdev_ram_block_discard_disable() fails for whatever reason, errp should be set or else SIGSEV is triggered in vfio_realize() when error_prepend() is called. By this chance, use the same error message for both legacy and iommufd backend. Fixes: 5ee3dc7af785 ("vfio/iommufd: Implement the iommufd backend") Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20250116102307.260849-1-zhenzhong.duan@intel.com Signed-off-by: Cédric Le Goater --- hw/vfio/iommufd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 3490a8f1eb..df61edffc0 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -515,8 +515,8 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, } else { ret = iommufd_cdev_ram_block_discard_disable(true); if (ret) { - error_setg(errp, - "Cannot set discarding of RAM broken (%d)", ret); + error_setg_errno(errp, -ret, + "Cannot set discarding of RAM broken"); goto err_discard_disable; } goto found_container; @@ -544,6 +544,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, ret = iommufd_cdev_ram_block_discard_disable(true); if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); goto err_discard_disable; } From 96b339cc4cc7e3d2da0e615e1851d3250b526ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:29 +0100 Subject: [PATCH 06/12] util/error: Introduce warn_report_err_once() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Depending on the configuration of the host and VM, a passthrough device may generate recurring DMA mapping errors at runtime. In such cases, reporting the issue once is sufficient. We have already the warn/error_report_once() routines taking a format and arguments. Using the same design pattern, add a new warning variant taking an 'Error *' parameter. Cc: Markus Armbruster Reviewed-by: Alex Williamson Reviewed-by: Markus Armbruster Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-2-clg@redhat.com Signed-off-by: Cédric Le Goater --- include/qapi/error.h | 12 ++++++++++++ util/error.c | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/qapi/error.h b/include/qapi/error.h index 71f8fb2c50..f5fe216262 100644 --- a/include/qapi/error.h +++ b/include/qapi/error.h @@ -466,6 +466,18 @@ void warn_reportf_err(Error *err, const char *fmt, ...) void error_reportf_err(Error *err, const char *fmt, ...) G_GNUC_PRINTF(2, 3); +/* + * Similar to warn_report_err(), except it prints the message just once. + * Return true when it prints, false otherwise. + */ +bool warn_report_err_once_cond(bool *printed, Error *err); + +#define warn_report_err_once(err) \ + ({ \ + static bool print_once_; \ + warn_report_err_once_cond(&print_once_, err); \ + }) + /* * Just like error_setg(), except you get to specify the error class. * Note: use of error classes other than ERROR_CLASS_GENERIC_ERROR is diff --git a/util/error.c b/util/error.c index e5e247209a..673011b89e 100644 --- a/util/error.c +++ b/util/error.c @@ -247,6 +247,17 @@ void warn_report_err(Error *err) error_free(err); } +bool warn_report_err_once_cond(bool *printed, Error *err) +{ + if (*printed) { + error_free(err); + return false; + } + *printed = true; + warn_report_err(err); + return true; +} + void error_reportf_err(Error *err, const char *fmt, ...) { va_list ap; From cbfbedd6173cf8ff3f374ec6b8e1a17e9fa75872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:30 +0100 Subject: [PATCH 07/12] vfio/pci: Replace "iommu_device" by "vIOMMU" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to be consistent with other reported errors related to vIOMMU devices. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-3-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 9a55e7b773..89d900e9cf 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3116,7 +3116,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (!vbasedev->mdev && !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { - error_prepend(errp, "Failed to set iommu_device: "); + error_prepend(errp, "Failed to set vIOMMU: "); goto out_teardown; } From cdc6f2e0c9fcef2027f4fc5f25ddd15e36283e90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:31 +0100 Subject: [PATCH 08/12] vfio: Rephrase comment in vfio_listener_region_add() error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rephrase a bit the ending comment about how errors are handled depending on the phase in which vfio_listener_region_add() is called. Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-4-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f7499a9b74..62af1216fc 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -683,12 +683,13 @@ fail: error_reportf_err(err, "PCI p2p may not work: "); return; } - /* - * On the initfn path, store the first error in the container so we - * can gracefully fail. Runtime, there's not much we can do other - * than throw a hardware error. - */ + if (!bcontainer->initialized) { + /* + * At machine init time or when the device is attached to the + * VM, store the first error in the container so we can + * gracefully fail the device realize routine. + */ if (!bcontainer->error) { error_propagate_prepend(&bcontainer->error, err, "Region %s: ", @@ -697,6 +698,10 @@ fail: error_free(err); } } else { + /* + * At runtime, there's not much we can do other than throw a + * hardware error. + */ error_report_err(err); hw_error("vfio: DMA mapping failed, unable to continue"); } From 80936cf7f38e55f41bf5b482ee04f055c76a0df0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:32 +0100 Subject: [PATCH 09/12] vfio: Introduce vfio_get_vfio_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helper will be useful in the listener handlers to extract the VFIO device from a memory region using memory_region_owner(). At the moment, we only care for PCI passthrough devices. If the need arises, we will add more. Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-5-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/helpers.c | 10 ++++++++++ include/hw/vfio/vfio-common.h | 1 + 2 files changed, 11 insertions(+) diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 913796f437..4b255d4f3a 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -23,6 +23,7 @@ #include #include "hw/vfio/vfio-common.h" +#include "hw/vfio/pci.h" #include "hw/hw.h" #include "trace.h" #include "qapi/error.h" @@ -728,3 +729,12 @@ bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp) return HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp); } + +VFIODevice *vfio_get_vfio_device(Object *obj) +{ + if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) { + return &VFIO_PCI(obj)->vbasedev; + } else { + return NULL; + } +} diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 0c60be5b15..ac35136a11 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -252,6 +252,7 @@ bool vfio_device_hiod_realize(VFIODevice *vbasedev, Error **errp); bool vfio_attach_device(char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); void vfio_detach_device(VFIODevice *vbasedev); +VFIODevice *vfio_get_vfio_device(Object *obj); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); From aaec00422ba6538ca3e6bfa612bbae155ec15c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:33 +0100 Subject: [PATCH 10/12] vfio: Improve error reporting when MMIO region mapping fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the IOMMU address space width is smaller than the physical address width, a MMIO region of a device can fail to map because the region is outside the supported IOVA ranges of the VM. In this case, PCI peer-to-peer transactions on BARs are not supported. This can occur with the 39-bit IOMMU address space width, as can be the case on some Intel consumer processors, or when using a vIOMMU device with default settings. The current error message is unclear, improve it and also change the error report to a warning because it is a non fatal condition for the VM. To prevent excessive log messages, restrict these recurring DMA mapping errors to a single warning at runtime. Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-6-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 62af1216fc..4fca4c6166 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -555,6 +555,18 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, return true; } +static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) +{ + /* + * MMIO region mapping failures are not fatal but in this case PCI + * peer-to-peer transactions are broken. + */ + if (vbasedev && vbasedev->type == VFIO_DEVICE_TYPE_PCI) { + error_append_hint(errp, "%s: PCI peer-to-peer transactions " + "on BARs are not supported.\n", vbasedev->name); + } +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -670,7 +682,10 @@ static void vfio_listener_region_add(MemoryListener *listener, strerror(-ret)); if (memory_region_is_ram_device(section->mr)) { /* Allow unexpected mappings not to be fatal for RAM devices */ - error_report_err(err); + VFIODevice *vbasedev = + vfio_get_vfio_device(memory_region_owner(section->mr)); + vfio_device_error_append(vbasedev, &err); + warn_report_err_once(err); return; } goto fail; From 889695f8f3c10647cc27a3788b4a3f1d0192926c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:34 +0100 Subject: [PATCH 11/12] vfio: Remove reports of DMA mapping errors in backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the mapping handlers of the IOMMU backends, VFIO IOMMU Type 1 aka. legacy and IOMMUFD, return an errno and also report an error. This can lead to excessive log messages at runtime for recurring DMA mapping errors. Since these errors are already reported by the callers in the vfio_container_dma_un/map() routines, simply remove them and allow the callers to handle the reporting. The mapping handler of the IOMMUFD backend has a comment suggesting MMIO region mapping failures return EFAULT. I am not sure this is entirely true, so keep the EFAULT case until the conditions are clarified. Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-7-clg@redhat.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 3 --- hw/vfio/container.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/backends/iommufd.c b/backends/iommufd.c index 7b4fc8ec46..d57da44755 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -167,8 +167,6 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, /* TODO: Not support mapping hardware PCI BAR region for now. */ if (errno == EFAULT) { warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); - } else { - error_report("IOMMU_IOAS_MAP failed: %m"); } } return ret; @@ -203,7 +201,6 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, if (ret) { ret = -errno; - error_report("IOMMU_IOAS_UNMAP failed: %m"); } return ret; } diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 4ebb526808..7c57bdd27b 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -159,7 +159,6 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); continue; } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); return -errno; } @@ -204,7 +203,6 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, return 0; } - error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); return -errno; } From be7d8579eb5758c0edf81eb068017a56471a77e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 6 Feb 2025 14:14:35 +0100 Subject: [PATCH 12/12] vfio: Remove superfluous error report in vfio_listener_region_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For pseries machines, commit 567b5b309abe ("vfio/pci: Relax DMA map errors for MMIO regions") introduced 2 error reports to notify the user of MMIO mapping errors. Consolidate both code paths under one. Cc: Harsh Prateek Bora Cc: Shivaprasad G Bhat Reviewed-by: Harsh Prateek Bora Reviewed-by: Alex Williamson Link: https://lore.kernel.org/qemu-devel/20250206131438.1505542-8-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 4fca4c6166..abbdc56b6d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -594,8 +594,9 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } + /* PPC64/pseries machine only */ if (!vfio_container_add_section_window(bcontainer, section, &err)) { - goto fail; + goto mmio_dma_error; } memory_region_ref(section->mr); @@ -680,6 +681,7 @@ static void vfio_listener_region_add(MemoryListener *listener, "0x%"HWADDR_PRIx", %p) = %d (%s)", bcontainer, iova, int128_get64(llsize), vaddr, ret, strerror(-ret)); + mmio_dma_error: if (memory_region_is_ram_device(section->mr)) { /* Allow unexpected mappings not to be fatal for RAM devices */ VFIODevice *vbasedev = @@ -694,11 +696,6 @@ static void vfio_listener_region_add(MemoryListener *listener, return; fail: - if (memory_region_is_ram_device(section->mr)) { - error_reportf_err(err, "PCI p2p may not work: "); - return; - } - if (!bcontainer->initialized) { /* * At machine init time or when the device is attached to the @@ -806,6 +803,7 @@ static void vfio_listener_region_del(MemoryListener *listener, memory_region_unref(section->mr); + /* PPC64/pseries machine only */ vfio_container_del_section_window(bcontainer, section); }