From 1f94b21801b22b01ba131e7d776f07c1062d9433 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:27 +0800 Subject: [PATCH 01/42] backends: Introduce HostIOMMUDevice abstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A HostIOMMUDevice is an abstraction for an assigned device that is protected by a physical IOMMU (aka host IOMMU). The userspace interaction with this physical IOMMU can be done either through the VFIO IOMMU type 1 legacy backend or the new iommufd backend. The assigned device can be a VFIO device or a VDPA device. The HostIOMMUDevice is needed to interact with the host IOMMU that protects the assigned device. It is especially useful when the device is also protected by a virtual IOMMU as this latter use the translation services of the physical IOMMU and is constrained by it. In that context the HostIOMMUDevice can be passed to the virtual IOMMU to collect physical IOMMU capabilities such as the supported address width. In the future, the virtual IOMMU will use the HostIOMMUDevice to program the guest page tables in the first translation stage of the physical IOMMU. Introduce .realize() to initialize HostIOMMUDevice further after instance init. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- MAINTAINERS | 2 ++ backends/host_iommu_device.c | 33 +++++++++++++++++++ backends/meson.build | 1 + include/sysemu/host_iommu_device.h | 53 ++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 backends/host_iommu_device.c create mode 100644 include/sysemu/host_iommu_device.h diff --git a/MAINTAINERS b/MAINTAINERS index f144b5af44..19f67dc5d2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2198,6 +2198,8 @@ M: Zhenzhong Duan S: Supported F: backends/iommufd.c F: include/sysemu/iommufd.h +F: backends/host_iommu_device.c +F: include/sysemu/host_iommu_device.h F: include/qemu/chardev_open.h F: util/chardev_open.c F: docs/devel/vfio-iommufd.rst diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c new file mode 100644 index 0000000000..8f2dda1beb --- /dev/null +++ b/backends/host_iommu_device.c @@ -0,0 +1,33 @@ +/* + * Host IOMMU device abstract + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/host_iommu_device.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice, + host_iommu_device, + HOST_IOMMU_DEVICE, + OBJECT) + +static void host_iommu_device_class_init(ObjectClass *oc, void *data) +{ +} + +static void host_iommu_device_init(Object *obj) +{ +} + +static void host_iommu_device_finalize(Object *obj) +{ + HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj); + + g_free(hiod->name); +} diff --git a/backends/meson.build b/backends/meson.build index 8b2b111497..106312f0c8 100644 --- a/backends/meson.build +++ b/backends/meson.build @@ -16,6 +16,7 @@ if host_os != 'windows' endif if host_os == 'linux' system_ss.add(files('hostmem-memfd.c')) + system_ss.add(files('host_iommu_device.c')) endif if keyutils.found() system_ss.add(keyutils, files('cryptodev-lkcf.c')) diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h new file mode 100644 index 0000000000..db47a16189 --- /dev/null +++ b/include/sysemu/host_iommu_device.h @@ -0,0 +1,53 @@ +/* + * Host IOMMU device abstract declaration + * + * Copyright (C) 2024 Intel Corporation. + * + * Authors: Zhenzhong Duan + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef HOST_IOMMU_DEVICE_H +#define HOST_IOMMU_DEVICE_H + +#include "qom/object.h" +#include "qapi/error.h" + +#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" +OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) + +struct HostIOMMUDevice { + Object parent_obj; + + char *name; +}; + +/** + * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices. + * + * Different types of host devices (e.g., VFIO or VDPA device) or devices + * with different backend (e.g., VFIO legacy container or IOMMUFD backend) + * will have different implementations of the HostIOMMUDeviceClass. + */ +struct HostIOMMUDeviceClass { + ObjectClass parent_class; + + /** + * @realize: initialize host IOMMU device instance further. + * + * Mandatory callback. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @opaque: pointer to agent device of this host IOMMU device, + * e.g., VFIO base device or VDPA device. + * + * @errp: pass an Error out when realize fails. + * + * Returns: true on success, false on failure. + */ + bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); +}; +#endif From 38998c79a1831e23ffed4b8b6f76222c1fedb9d5 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:28 +0800 Subject: [PATCH 02/42] backends/host_iommu_device: Introduce HostIOMMUDeviceCaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HostIOMMUDeviceCaps's elements map to the host IOMMU's capabilities. Different platform IOMMU can support different elements. Currently only two elements, type and aw_bits, type hints the host platform IOMMU type, i.e., INTEL vtd, ARM smmu, etc; aw_bits hints host IOMMU address width. Introduce .get_cap() handler to check if HOST_IOMMU_DEVICE_CAP_XXX is supported. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- include/sysemu/host_iommu_device.h | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h index db47a16189..a57873958b 100644 --- a/include/sysemu/host_iommu_device.h +++ b/include/sysemu/host_iommu_device.h @@ -15,6 +15,18 @@ #include "qom/object.h" #include "qapi/error.h" +/** + * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. + * + * @type: host platform IOMMU type. + * + * @aw_bits: host IOMMU address width. 0xff if no limitation. + */ +typedef struct HostIOMMUDeviceCaps { + uint32_t type; + uint8_t aw_bits; +} HostIOMMUDeviceCaps; + #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) @@ -22,6 +34,7 @@ struct HostIOMMUDevice { Object parent_obj; char *name; + HostIOMMUDeviceCaps caps; }; /** @@ -49,5 +62,30 @@ struct HostIOMMUDeviceClass { * Returns: true on success, false on failure. */ bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp); + /** + * @get_cap: check if a host IOMMU device capability is supported. + * + * Optional callback, if not implemented, hint not supporting query + * of @cap. + * + * @hiod: pointer to a host IOMMU device instance. + * + * @cap: capability to check. + * + * @errp: pass an Error out when fails to query capability. + * + * Returns: <0 on failure, 0 if a @cap is unsupported, or else + * 1 or some positive value for some special @cap, + * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. + */ + int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); }; + +/* + * Host IOMMU device capability list. + */ +#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE 0 +#define HOST_IOMMU_DEVICE_CAP_AW_BITS 1 + +#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX 64 #endif From 0533739ecefbd3b820d32b576ad10dc6b0d56c29 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:29 +0800 Subject: [PATCH 03/42] vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO represents a host IOMMU device under VFIO legacy container backend. It will have its own realize implementation. Suggested-by: Eric Auger Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/container.c | 5 ++++- include/hw/vfio/vfio-common.h | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 096cc97258..c4fca2dfca 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1141,7 +1141,10 @@ static const TypeInfo types[] = { .name = TYPE_VFIO_IOMMU_LEGACY, .parent = TYPE_VFIO_IOMMU, .class_init = vfio_iommu_legacy_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE, + } }; DEFINE_TYPES(types) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 4cb1ab8645..75b167979a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -31,6 +31,7 @@ #endif #include "sysemu/sysemu.h" #include "hw/vfio/vfio-container-base.h" +#include "sysemu/host_iommu_device.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -171,6 +172,8 @@ typedef struct VFIOGroup { bool ram_block_discard_allowed; } VFIOGroup; +#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" + typedef struct VFIODMABuf { QemuDmaBuf *buf; uint32_t pos_x, pos_y, pos_updates; From 9005f928447841ed253e000d5e8220e381872cb0 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:30 +0800 Subject: [PATCH 04/42] backends/iommufd: Introduce TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TYPE_HOST_IOMMU_DEVICE_IOMMUFD represents a host IOMMU device under iommufd backend. It is abstract, because it is going to be derived into VFIO or VDPA type'd device. It will have its own .get_cap() implementation. TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO is a sub-class of TYPE_HOST_IOMMU_DEVICE_IOMMUFD, represents a VFIO type'd host IOMMU device under iommufd backend. It will be created during VFIO device attaching and passed to vIOMMU. It will have its own .realize() implementation. Opportunistically, add missed header to include/sysemu/iommufd.h. Suggested-by: Cédric Le Goater Signed-off-by: Yi Liu Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- backends/iommufd.c | 35 ++++++++++++++++++----------------- hw/vfio/iommufd.c | 5 ++++- include/hw/vfio/vfio-common.h | 3 +++ include/sysemu/iommufd.h | 16 ++++++++++++++++ 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/backends/iommufd.c b/backends/iommufd.c index c506afbdac..012f18d8d8 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -208,23 +208,24 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, return ret; } -static const TypeInfo iommufd_backend_info = { - .name = TYPE_IOMMUFD_BACKEND, - .parent = TYPE_OBJECT, - .instance_size = sizeof(IOMMUFDBackend), - .instance_init = iommufd_backend_init, - .instance_finalize = iommufd_backend_finalize, - .class_size = sizeof(IOMMUFDBackendClass), - .class_init = iommufd_backend_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } +static const TypeInfo types[] = { + { + .name = TYPE_IOMMUFD_BACKEND, + .parent = TYPE_OBJECT, + .instance_size = sizeof(IOMMUFDBackend), + .instance_init = iommufd_backend_init, + .instance_finalize = iommufd_backend_finalize, + .class_size = sizeof(IOMMUFDBackendClass), + .class_init = iommufd_backend_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .parent = TYPE_HOST_IOMMU_DEVICE, + .abstract = true, } }; -static void register_types(void) -{ - type_register_static(&iommufd_backend_info); -} - -type_init(register_types); +DEFINE_TYPES(types) diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 554f9a6292..e4a507d55c 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -624,7 +624,10 @@ static const TypeInfo types[] = { .name = TYPE_VFIO_IOMMU_IOMMUFD, .parent = TYPE_VFIO_IOMMU, .class_init = vfio_iommu_iommufd_class_init, - }, + }, { + .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, + .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + } }; DEFINE_TYPES(types) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 75b167979a..56d1717211 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -32,6 +32,7 @@ #include "sysemu/sysemu.h" #include "hw/vfio/vfio-container-base.h" #include "sysemu/host_iommu_device.h" +#include "sysemu/iommufd.h" #define VFIO_MSG_PREFIX "vfio %s: " @@ -173,6 +174,8 @@ typedef struct VFIOGroup { } VFIOGroup; #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ + TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" typedef struct VFIODMABuf { QemuDmaBuf *buf; diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h index 293bfbe967..f6e6d6e1f9 100644 --- a/include/sysemu/iommufd.h +++ b/include/sysemu/iommufd.h @@ -1,9 +1,23 @@ +/* + * iommufd container backend declaration + * + * Copyright (C) 2024 Intel Corporation. + * Copyright Red Hat, Inc. 2024 + * + * Authors: Yi Liu + * Eric Auger + * Zhenzhong Duan + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + #ifndef SYSEMU_IOMMUFD_H #define SYSEMU_IOMMUFD_H #include "qom/object.h" #include "exec/hwaddr.h" #include "exec/cpu-common.h" +#include "sysemu/host_iommu_device.h" #define TYPE_IOMMUFD_BACKEND "iommufd" OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) @@ -33,4 +47,6 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size); + +#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" #endif From 6f274444c579305d14d355bab24af31ea2bef224 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:31 +0800 Subject: [PATCH 05/42] range: Introduce range_get_last_bit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helper get the highest 1 bit position of the upper bound. If the range is empty or upper bound is zero, -1 is returned. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- include/qemu/range.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/qemu/range.h b/include/qemu/range.h index 205e1da76d..4ce694a398 100644 --- a/include/qemu/range.h +++ b/include/qemu/range.h @@ -20,6 +20,8 @@ #ifndef QEMU_RANGE_H #define QEMU_RANGE_H +#include "qemu/bitops.h" + /* * Operations on 64 bit address ranges. * Notes: @@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t len1, return !(last2 < first1 || last1 < first2); } +/* Get highest non-zero bit position of a range */ +static inline int range_get_last_bit(Range *range) +{ + if (range_is_empty(range)) { + return -1; + } + return 63 - clz64(range->upb); +} + /* * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap. * Both @a and @b must not be empty. From d441e05e26033eb0e49c4185293424a480ef750f Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:32 +0800 Subject: [PATCH 06/42] vfio/container: Implement HostIOMMUDeviceClass::realize() handler The realize function populates the capabilities. For now only the aw_bits caps is computed for legacy backend. Introduce a helper function vfio_device_get_aw_bits() which calls range_get_last_bit() to get host aw_bits and package it in HostIOMMUDeviceCaps for query with .get_cap(). This helper will also be used by iommufd backend. Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/container.c | 19 +++++++++++++++++++ hw/vfio/helpers.c | 17 +++++++++++++++++ include/hw/vfio/vfio-common.h | 1 + 3 files changed, 37 insertions(+) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index c4fca2dfca..2f62c13214 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1136,6 +1136,24 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; }; +static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + + hiod->name = g_strdup(vdev->name); + hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); + + return true; +} + +static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->realize = hiod_legacy_vfio_realize; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_LEGACY, @@ -1144,6 +1162,7 @@ static const TypeInfo types[] = { }, { .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_legacy_vfio_class_init, } }; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 27ea26aa48..b14edd46ed 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -658,3 +658,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, vbasedev->ram_block_discard_allowed = ram_discard; } + +int vfio_device_get_aw_bits(VFIODevice *vdev) +{ + /* + * iova_ranges is a sorted list. For old kernels that support + * VFIO but not support query of iova ranges, iova_ranges is NULL, + * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned. + */ + GList *l = g_list_last(vdev->bcontainer->iova_ranges); + + if (l) { + Range *range = l->data; + return range_get_last_bit(range) + 1; + } + + return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX; +} diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 56d1717211..105b8b7e80 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -289,4 +289,5 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); +int vfio_device_get_aw_bits(VFIODevice *vdev); #endif /* HW_VFIO_VFIO_COMMON_H */ From 42965386ea21fe375a5a2a6d85261663576d86d9 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:33 +0800 Subject: [PATCH 07/42] backends/iommufd: Introduce helper function iommufd_backend_get_device_info() Introduce a helper function iommufd_backend_get_device_info() to get host IOMMU related information through iommufd uAPI. Signed-off-by: Yi Liu Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- backends/iommufd.c | 22 ++++++++++++++++++++++ include/sysemu/iommufd.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/backends/iommufd.c b/backends/iommufd.c index 012f18d8d8..c7e969d6f7 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -208,6 +208,28 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, return ret; } +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp) +{ + struct iommu_hw_info info = { + .size = sizeof(info), + .dev_id = devid, + .data_len = len, + .data_uptr = (uintptr_t)data, + }; + + if (ioctl(be->fd, IOMMU_GET_HW_INFO, &info)) { + error_setg_errno(errp, errno, "Failed to get hardware info"); + return false; + } + + g_assert(type); + *type = info.out_data_type; + + return true; +} + static const TypeInfo types[] = { { .name = TYPE_IOMMUFD_BACKEND, diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h index f6e6d6e1f9..9edfec6045 100644 --- a/include/sysemu/iommufd.h +++ b/include/sysemu/iommufd.h @@ -47,6 +47,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size); +bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, + uint32_t *type, void *data, uint32_t len, + Error **errp); #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" #endif From 930589520128a5f25e65f9f923ac8dc6fac32ff8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:34 +0800 Subject: [PATCH 08/42] vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler It calls iommufd_backend_get_device_info() to get host IOMMU related information and translate it into HostIOMMUDeviceCaps for query with .get_cap(). For aw_bits, use the same way as legacy backend by calling vfio_device_get_aw_bits() which is common for different vendor IOMMU. Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/iommufd.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index e4a507d55c..1674c61227 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -619,6 +619,35 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; }; +static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, + Error **errp) +{ + VFIODevice *vdev = opaque; + HostIOMMUDeviceCaps *caps = &hiod->caps; + enum iommu_hw_info_type type; + union { + struct iommu_hw_info_vtd vtd; + } data; + + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, + &type, &data, sizeof(data), errp)) { + return false; + } + + hiod->name = g_strdup(vdev->name); + caps->type = type; + caps->aw_bits = vfio_device_get_aw_bits(vdev); + + return true; +} + +static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + + hiodc->realize = hiod_iommufd_vfio_realize; +}; + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_IOMMUFD, @@ -627,6 +656,7 @@ static const TypeInfo types[] = { }, { .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, + .class_init = hiod_iommufd_vfio_class_init, } }; From ed92ed2d486e5fddacb1e611c57e976eb160d0a5 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:35 +0800 Subject: [PATCH 09/42] vfio/container: Implement HostIOMMUDeviceClass::get_cap() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/container.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 2f62c13214..99beeba422 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1147,11 +1147,26 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, return true; } +static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, + Error **errp) +{ + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } +} + static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) { HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); hioc->realize = hiod_legacy_vfio_realize; + hioc->get_cap = hiod_legacy_vfio_get_cap; }; static const TypeInfo types[] = { From 63c6e83ec236f5de07444eac1d0bb747c506cc90 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:36 +0800 Subject: [PATCH 10/42] backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- backends/iommufd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backends/iommufd.c b/backends/iommufd.c index c7e969d6f7..84fefbc9ee 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -230,6 +230,28 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, return true; } +static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) +{ + HostIOMMUDeviceCaps *caps = &hiod->caps; + + switch (cap) { + case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE: + return caps->type; + case HOST_IOMMU_DEVICE_CAP_AW_BITS: + return caps->aw_bits; + default: + error_setg(errp, "%s: unsupported capability %x", hiod->name, cap); + return -EINVAL; + } +} + +static void hiod_iommufd_class_init(ObjectClass *oc, void *data) +{ + HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); + + hioc->get_cap = hiod_iommufd_get_cap; +}; + static const TypeInfo types[] = { { .name = TYPE_IOMMUFD_BACKEND, @@ -246,6 +268,7 @@ static const TypeInfo types[] = { }, { .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, .parent = TYPE_HOST_IOMMU_DEVICE, + .class_init = hiod_iommufd_class_init, .abstract = true, } }; From a7fd91b876fe788c7401120c8eefe5c91b6f17c3 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:37 +0800 Subject: [PATCH 11/42] vfio: Create host IOMMU device instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create host IOMMU device instance in vfio_attach_device() and call .realize() to initialize it further. Introuduce attribute VFIOIOMMUClass::hiod_typename and initialize it based on VFIO backend type. It will facilitate HostIOMMUDevice creation in vfio_attach_device(). Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/common.c | 16 +++++++++++++++- hw/vfio/container.c | 2 ++ hw/vfio/iommufd.c | 2 ++ include/hw/vfio/vfio-common.h | 1 + include/hw/vfio/vfio-container-base.h | 3 +++ 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f9619a1dfb..f20a7b5bba 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1528,6 +1528,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, { const VFIOIOMMUClass *ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + HostIOMMUDevice *hiod; if (vbasedev->iommufd) { ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); @@ -1535,7 +1536,19 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev, assert(ops); - return ops->attach_device(name, vbasedev, as, errp); + if (!ops->attach_device(name, vbasedev, as, errp)) { + return false; + } + + hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename)); + if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) { + object_unref(hiod); + ops->detach_device(vbasedev); + return false; + } + vbasedev->hiod = hiod; + + return true; } void vfio_detach_device(VFIODevice *vbasedev) @@ -1543,5 +1556,6 @@ void vfio_detach_device(VFIODevice *vbasedev) if (!vbasedev->bcontainer) { return; } + object_unref(vbasedev->hiod); vbasedev->bcontainer->ops->detach_device(vbasedev); } diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 99beeba422..26e6f7fb4f 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1126,6 +1126,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO; + vioc->setup = vfio_legacy_setup; vioc->dma_map = vfio_legacy_dma_map; vioc->dma_unmap = vfio_legacy_dma_unmap; diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 1674c61227..409ed3dcc9 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -612,6 +612,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) { VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO; + vioc->dma_map = iommufd_cdev_map; vioc->dma_unmap = iommufd_cdev_unmap; vioc->attach_device = iommufd_cdev_attach; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 105b8b7e80..776de8064f 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -127,6 +127,7 @@ typedef struct VFIODevice { OnOffAuto pre_copy_dirty_page_tracking; bool dirty_pages_supported; bool dirty_tracking; + HostIOMMUDevice *hiod; int devid; IOMMUFDBackend *iommufd; } VFIODevice; diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 2776481fc9..442c0dfc4c 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -109,6 +109,9 @@ DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) struct VFIOIOMMUClass { InterfaceClass parent_class; + /* Properties */ + const char *hiod_typename; + /* basic feature */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); int (*dma_map)(const VFIOContainerBase *bcontainer, From 6c8ed5fea1ff60167736d530e39f3903a826df20 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:38 +0800 Subject: [PATCH 12/42] hw/pci: Introduce helper function pci_device_get_iommu_bus_devfn() Extract out pci_device_get_iommu_bus_devfn() from pci_device_iommu_address_space() to facilitate implementation of pci_device_[set|unset]_iommu_device() in following patch. No functional change intended. Signed-off-by: Yi Liu Signed-off-by: Yi Sun Signed-off-by: Nicolin Chen Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/pci/pci.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 324c1302d2..02a4bb2af6 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) } } -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +/* + * Get IOMMU root bus, aliased bus and devfn of a PCI device + * + * IOMMU root bus is needed by all call sites to call into iommu_ops. + * For call sites which don't need aliased BDF, passing NULL to + * aliased_[bus|devfn] is allowed. + * + * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device. + * + * @aliased_bus: return aliased #PCIBus of the PCI device, optional. + * + * @aliased_devfn: return aliased devfn of the PCI device, optional. + */ +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, + PCIBus **piommu_bus, + PCIBus **aliased_bus, + int *aliased_devfn) { PCIBus *bus = pci_get_bus(dev); PCIBus *iommu_bus = bus; - uint8_t devfn = dev->devfn; + int devfn = dev->devfn; while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); @@ -2693,7 +2709,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); + assert(iommu_bus); + + if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) { + iommu_bus = NULL; + } + + *piommu_bus = iommu_bus; + + if (aliased_bus) { + *aliased_bus = bus; + } + + if (aliased_devfn) { + *aliased_devfn = devfn; + } +} + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn); + if (iommu_bus) { return iommu_bus->iommu_ops->get_address_space(bus, iommu_bus->iommu_opaque, devfn); } From b025ea6886fbea80f0c3ab7e54b4cfa7040ab8bf Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 5 Jun 2024 16:30:39 +0800 Subject: [PATCH 13/42] hw/pci: Introduce pci_device_[set|unset]_iommu_device() pci_device_[set|unset]_iommu_device() call pci_device_get_iommu_bus_devfn() to get iommu_bus->iommu_ops and call [set|unset]_iommu_device callback to set/unset HostIOMMUDevice for a given PCI device. Signed-off-by: Yi Liu Signed-off-by: Yi Sun Signed-off-by: Nicolin Chen Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/pci/pci.c | 27 +++++++++++++++++++++++++++ include/hw/pci/pci.h | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 02a4bb2af6..c8a8aab306 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2742,6 +2742,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp) +{ + PCIBus *iommu_bus; + + /* set_iommu_device requires device's direct BDF instead of aliased BDF */ + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) { + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn, hiod, errp); + } + return true; +} + +void pci_device_unset_iommu_device(PCIDevice *dev) +{ + PCIBus *iommu_bus; + + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) { + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn); + } +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index eaa3fc99d8..eb26cac810 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -3,6 +3,7 @@ #include "exec/memory.h" #include "sysemu/dma.h" +#include "sysemu/host_iommu_device.h" /* PCI includes legacy ISA access. */ #include "hw/isa/isa.h" @@ -383,10 +384,45 @@ typedef struct PCIIOMMUOps { * * @devfn: device and function number */ - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + /** + * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU + * + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't + * retrieve host information from the associated HostIOMMUDevice. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @dev: the #HostIOMMUDevice to attach. + * + * @errp: pass an Error out only when return false + * + * Returns: true if HostIOMMUDevice is attached or else false with errp set. + */ + bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *dev, Error **errp); + /** + * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + */ + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); +bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, + Error **errp); +void pci_device_unset_iommu_device(PCIDevice *dev); /** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus From ee26474daa127d7ebfb5e3dd6633361decda9e08 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:40 +0800 Subject: [PATCH 14/42] vfio/pci: Pass HostIOMMUDevice to vIOMMU With HostIOMMUDevice passed, vIOMMU can check compatibility with host IOMMU, call into IOMMUFD specific methods, etc. Originally-by: Yi Liu Signed-off-by: Nicolin Chen Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/vfio/pci.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 74a79bdf61..d8a76c1ee0 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3121,10 +3121,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_register(vdev); - if (!vfio_add_capabilities(vdev, errp)) { + if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) { + error_prepend(errp, "Failed to set iommu_device: "); goto out_teardown; } + if (!vfio_add_capabilities(vdev, errp)) { + goto out_unset_idev; + } + if (vdev->vga) { vfio_vga_quirk_setup(vdev); } @@ -3141,7 +3146,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) error_setg(errp, "cannot support IGD OpRegion feature on hotplugged " "device"); - goto out_teardown; + goto out_unset_idev; } ret = vfio_get_dev_region_info(vbasedev, @@ -3150,11 +3155,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (ret) { error_setg_errno(errp, -ret, "does not support requested IGD OpRegion feature"); - goto out_teardown; + goto out_unset_idev; } if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - goto out_teardown; + goto out_unset_idev; } } @@ -3238,6 +3243,8 @@ out_deregister: if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } +out_unset_idev: + pci_device_unset_iommu_device(pdev); out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); @@ -3266,6 +3273,7 @@ static void vfio_instance_finalize(Object *obj) static void vfio_exitfn(PCIDevice *pdev) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; vfio_unregister_req_notifier(vdev); vfio_unregister_err_notifier(vdev); @@ -3280,7 +3288,8 @@ static void vfio_exitfn(PCIDevice *pdev) vfio_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); vfio_bars_exit(vdev); - vfio_migration_exit(&vdev->vbasedev); + vfio_migration_exit(vbasedev); + pci_device_unset_iommu_device(pdev); } static void vfio_pci_reset(DeviceState *dev) From d5fd978d918516b7bc4224de432c7ef93ec089a3 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:41 +0800 Subject: [PATCH 15/42] intel_iommu: Extract out vtd_cap_init() to initialize cap/ecap Extract cap/ecap initialization in vtd_cap_init() to make code cleaner. No functional change intended. Reviewed-by: Eric Auger Signed-off-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 93 ++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c4350e0ff0..c69c0d285b 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3934,30 +3934,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) return; } -/* Do the initialization. It will also be called when reset, so pay - * attention when adding new initialization stuff. - */ -static void vtd_init(IntelIOMMUState *s) +static void vtd_cap_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - memset(s->csr, 0, DMAR_REG_SIZE); - memset(s->wmask, 0, DMAR_REG_SIZE); - memset(s->w1cmask, 0, DMAR_REG_SIZE); - memset(s->womask, 0, DMAR_REG_SIZE); - - s->root = 0; - s->root_scalable = false; - s->dmar_enabled = false; - s->intr_enabled = false; - s->iq_head = 0; - s->iq_tail = 0; - s->iq = 0; - s->iq_size = 0; - s->qi_enabled = false; - s->iq_last_desc_type = VTD_INV_DESC_NONE; - s->iq_dw = false; - s->next_frcd_reg = 0; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | VTD_CAP_MGAW(s->aw_bits); @@ -3974,27 +3954,6 @@ static void vtd_init(IntelIOMMUState *s) } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; - /* - * Rsvd field masks for spte - */ - vtd_spte_rsvd[0] = ~0ULL; - vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); - vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); - - vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, - x86_iommu->dt_supported); - - if (s->scalable_mode || s->snoop_control) { - vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; - vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; - } - if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -4027,6 +3986,56 @@ static void vtd_init(IntelIOMMUState *s) if (s->pasid) { s->ecap |= VTD_ECAP_PASID; } +} + +/* + * Do the initialization. It will also be called when reset, so pay + * attention when adding new initialization stuff. + */ +static void vtd_init(IntelIOMMUState *s) +{ + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + memset(s->csr, 0, DMAR_REG_SIZE); + memset(s->wmask, 0, DMAR_REG_SIZE); + memset(s->w1cmask, 0, DMAR_REG_SIZE); + memset(s->womask, 0, DMAR_REG_SIZE); + + s->root = 0; + s->root_scalable = false; + s->dmar_enabled = false; + s->intr_enabled = false; + s->iq_head = 0; + s->iq_tail = 0; + s->iq = 0; + s->iq_size = 0; + s->qi_enabled = false; + s->iq_last_desc_type = VTD_INV_DESC_NONE; + s->iq_dw = false; + s->next_frcd_reg = 0; + + vtd_cap_init(s); + + /* + * Rsvd field masks for spte + */ + vtd_spte_rsvd[0] = ~0ULL; + vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, + x86_iommu->dt_supported); + + if (s->scalable_mode || s->snoop_control) { + vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; + } vtd_reset_caches(s); From a20910ca3e283b10585c71061746ea6b1fa6ca91 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Wed, 5 Jun 2024 16:30:42 +0800 Subject: [PATCH 16/42] intel_iommu: Implement [set|unset]_iommu_device() callbacks Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU. In set call, we take a reference of HostIOMMUDevice and store it in hash table indexed by PCI BDF. Note this BDF index is device's real BDF not the aliased one which is different from the index of VTDAddressSpace. There can be multiple assigned devices under same virtual iommu group and share same VTDAddressSpace, but each has its own HostIOMMUDevice. Signed-off-by: Yi Liu Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 81 +++++++++++++++++++++++++++++++++++ include/hw/i386/intel_iommu.h | 2 + 2 files changed, 83 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c69c0d285b..019d1c9c80 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -61,6 +61,12 @@ struct vtd_as_key { uint32_t pasid; }; +/* bus/devfn is PCI device's real BDF not the aliased one */ +struct vtd_hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v) return (guint)(value << 8 | key->devfn); } +/* Same implementation as vtd_as_hash() */ +static guint vtd_hiod_hash(gconstpointer v) +{ + return vtd_as_hash(v); +} + +static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_hiod_key *key1 = v1; + const struct vtd_hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static void vtd_hiod_destroy(gpointer v) +{ + object_unref(v); +} + static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, gpointer user_data) { @@ -3812,6 +3837,58 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + struct vtd_as_key *new_key; + + assert(hiod); + + vtd_iommu_lock(s); + + if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + error_setg(errp, "Host IOMMU device already exist"); + vtd_iommu_unlock(s); + return false; + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod); + + vtd_iommu_unlock(s); + + return true; +} + +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + IntelIOMMUState *s = opaque; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + + vtd_iommu_lock(s); + + if (!g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { + vtd_iommu_unlock(s); + return; + } + + g_hash_table_remove(s->vtd_host_iommu_dev, &key); + + vtd_iommu_unlock(s); +} + /* Unmap the whole range in the notifier's scope. */ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) { @@ -4116,6 +4193,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, + .set_iommu_device = vtd_dev_set_iommu_device, + .unset_iommu_device = vtd_dev_unset_iommu_device, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) @@ -4235,6 +4314,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); + s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, vtd_hiod_equal, + g_free, vtd_hiod_destroy); vtd_init(s); pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 7fa0a695c8..1eb05c29fc 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -292,6 +292,8 @@ struct IntelIOMMUState { /* list of registered notifiers */ QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */ + /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ dma_addr_t intr_root; /* Interrupt remapping table pointer */ From 77f6efc0ab93718da2bc3f908b7ff7fffa489ea1 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Wed, 5 Jun 2024 16:30:43 +0800 Subject: [PATCH 17/42] intel_iommu: Check compatibility with host IOMMU capabilities If check fails, host device (either VFIO or VDPA device) is not compatible with current vIOMMU config and should not be passed to guest. Only aw_bits is checked for now, we don't care about other caps before scalable modern mode is introduced. Signed-off-by: Yi Liu Signed-off-by: Zhenzhong Duan Reviewed-by: Eric Auger Reviewed-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 019d1c9c80..37c21a0aec 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3837,6 +3837,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, + Error **errp) +{ + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + int ret; + + if (!hiodc->get_cap) { + error_setg(errp, ".get_cap() not implemented"); + return false; + } + + /* Common checks */ + ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp); + if (ret < 0) { + return false; + } + if (s->aw_bits > ret) { + error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret); + return false; + } + + return true; +} + static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, HostIOMMUDevice *hiod, Error **errp) { @@ -3857,6 +3881,11 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, return false; } + if (!vtd_check_hiod(s, hiod, errp)) { + vtd_iommu_unlock(s); + return false; + } + new_key = g_malloc(sizeof(*new_key)); new_key->bus = bus; new_key->devfn = devfn; From dc169694cad4b4c4028c755ec44ebb7565bd7461 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:51 +0200 Subject: [PATCH 18/42] HostIOMMUDevice: Store the VFIO/VDPA agent Store the agent device (VFIO or VDPA) in the host IOMMU device. This will allow easy access to some of its resources. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/vfio/container.c | 1 + hw/vfio/iommufd.c | 2 ++ include/sysemu/host_iommu_device.h | 1 + 3 files changed, 4 insertions(+) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 26e6f7fb4f..b728b978a2 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1145,6 +1145,7 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque, hiod->name = g_strdup(vdev->name); hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev); + hiod->agent = opaque; return true; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 409ed3dcc9..dbdae1adbb 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -631,6 +631,8 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, struct iommu_hw_info_vtd vtd; } data; + hiod->agent = opaque; + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, &data, sizeof(data), errp)) { return false; diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h index a57873958b..3e5f058e7b 100644 --- a/include/sysemu/host_iommu_device.h +++ b/include/sysemu/host_iommu_device.h @@ -34,6 +34,7 @@ struct HostIOMMUDevice { Object parent_obj; char *name; + void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ HostIOMMUDeviceCaps caps; }; From 817ef10da23cd9a6ee61c8f31f55b845c7a74760 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:52 +0200 Subject: [PATCH 19/42] virtio-iommu: Implement set|unset]_iommu_device() callbacks Implement PCIIOMMUOPs [set|unset]_iommu_device() callbacks. In set(), the HostIOMMUDevice handle is stored in a hash table indexed by PCI BDF. The object will allow to retrieve information related to the physical IOMMU. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 82 ++++++++++++++++++++++++++++++++ include/hw/virtio/virtio-iommu.h | 2 + 2 files changed, 84 insertions(+) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1326c6ec41..16c8ec3ca4 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping { uint32_t flags; } VirtIOIOMMUMapping; +struct hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ -462,8 +467,82 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct hiod_key *key1 = v1; + const struct hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static guint hiod_hash(gconstpointer v) +{ + const struct hiod_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + +static void hiod_destroy(gpointer v) +{ + object_unref(v); +} + +static HostIOMMUDevice * +get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + return g_hash_table_lookup(viommu->host_iommu_devices, &key); +} + +static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + VirtIOIOMMU *viommu = opaque; + struct hiod_key *new_key; + + assert(hiod); + + if (get_host_iommu_device(viommu, bus, devfn)) { + error_setg(errp, "Host IOMMU device already exists"); + return false; + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + + return true; +} + +static void +virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDevice *hiod; + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); + if (!hiod) { + return; + } + + g_hash_table_remove(viommu->host_iommu_devices, &key); +} + static const PCIIOMMUOps virtio_iommu_ops = { .get_address_space = virtio_iommu_find_add_as, + .set_iommu_device = virtio_iommu_set_iommu_device, + .unset_iommu_device = virtio_iommu_unset_iommu_device, }; static int virtio_iommu_attach(VirtIOIOMMU *s, @@ -1357,6 +1436,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, + g_free, hiod_destroy); + if (s->primary_bus) { pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index 83a52cc446..bdb3da72d0 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -25,6 +25,7 @@ #include "hw/pci/pci.h" #include "qom/object.h" #include "qapi/qapi-types-virtio.h" +#include "sysemu/host_iommu_device.h" #define TYPE_VIRTIO_IOMMU "virtio-iommu-device" #define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci" @@ -57,6 +58,7 @@ struct VirtIOIOMMU { struct virtio_iommu_config config; uint64_t features; GHashTable *as_by_busptr; + GHashTable *host_iommu_devices; IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; ReservedRegion *prop_resv_regions; From 3ad35d9158bde0500aeaea4147345d39ad0ae1c0 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:53 +0200 Subject: [PATCH 20/42] HostIOMMUDevice: Introduce get_iova_ranges callback Introduce a new HostIOMMUDevice callback that allows to retrieve the usable IOVA ranges. Implement this callback in the legacy VFIO and IOMMUFD VFIO host iommu devices. This relies on the VFIODevice agent's base container iova_ranges resource. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/vfio/container.c | 16 ++++++++++++++++ hw/vfio/iommufd.c | 16 ++++++++++++++++ include/sysemu/host_iommu_device.h | 8 ++++++++ 3 files changed, 40 insertions(+) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index b728b978a2..c48749c089 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1164,12 +1164,28 @@ static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap, } } +static GList * +hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) { HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); hioc->realize = hiod_legacy_vfio_realize; hioc->get_cap = hiod_legacy_vfio_get_cap; + hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges; }; static const TypeInfo types[] = { diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index dbdae1adbb..e502081c2a 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -645,11 +645,27 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, return true; } +static GList * +hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) +{ + VFIODevice *vdev = hiod->agent; + GList *l = NULL; + + g_assert(vdev); + + if (vdev->bcontainer) { + l = g_list_copy(vdev->bcontainer->iova_ranges); + } + + return l; +} + static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; + hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; }; static const TypeInfo types[] = { diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h index 3e5f058e7b..40e0fa13ef 100644 --- a/include/sysemu/host_iommu_device.h +++ b/include/sysemu/host_iommu_device.h @@ -80,6 +80,14 @@ struct HostIOMMUDeviceClass { * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS. */ int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp); + /** + * @get_iova_ranges: Return the list of usable iova_ranges along with + * @hiod Host IOMMU device + * + * @hiod: handle to the host IOMMU device + * @errp: error handle + */ + GList* (*get_iova_ranges)(HostIOMMUDevice *hiod, Error **errp); }; /* From a95264191f10173e27b175ebc4a487520a523d62 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:54 +0200 Subject: [PATCH 21/42] HostIOMMUDevice: Store the aliased bus and devfn Store the aliased bus and devfn in the HostIOMMUDevice. This will be useful to handle info that are iommu group specific and not device specific (such as reserved iova ranges). Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/pci/pci.c | 8 ++++++-- include/sysemu/host_iommu_device.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index c8a8aab306..50b86d5790 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2745,11 +2745,15 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { - PCIBus *iommu_bus; + PCIBus *iommu_bus, *aliased_bus; + int aliased_devfn; /* set_iommu_device requires device's direct BDF instead of aliased BDF */ - pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL); + pci_device_get_iommu_bus_devfn(dev, &iommu_bus, + &aliased_bus, &aliased_devfn); if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) { + hiod->aliased_bus = aliased_bus; + hiod->aliased_devfn = aliased_devfn; return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), iommu_bus->iommu_opaque, dev->devfn, hiod, errp); diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h index 40e0fa13ef..ee6c813c8b 100644 --- a/include/sysemu/host_iommu_device.h +++ b/include/sysemu/host_iommu_device.h @@ -35,6 +35,8 @@ struct HostIOMMUDevice { char *name; void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ + PCIBus *aliased_bus; + int aliased_devfn; HostIOMMUDeviceCaps caps; }; From cf2647a76e7854ff28c21c8fb4bfca1da603e007 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:55 +0200 Subject: [PATCH 22/42] virtio-iommu: Compute host reserved regions Compute the host reserved regions in virtio_iommu_set_iommu_device(). The usable IOVA regions are retrieved from the HostIOMMUDevice. The virtio_iommu_set_host_iova_ranges() helper turns usable regions into complementary reserved regions while testing the inclusion into existing ones. virtio_iommu_set_host_iova_ranges() reuse the implementation of virtio_iommu_set_iova_ranges() which will be removed in subsequent patches. rebuild_resv_regions() is just moved. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 147 ++++++++++++++++++++++++++++++--------- 1 file changed, 113 insertions(+), 34 deletions(-) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 16c8ec3ca4..a4c0cceb65 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -498,11 +498,108 @@ get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { return g_hash_table_lookup(viommu->host_iommu_devices, &key); } +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn, GList *iova_ranges, + Error **errp) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + GList *current_ranges; + GList *l, *tmp, *new_ranges = NULL; + int ret = -EINVAL; + + if (!sbus) { + error_report("%s no sbus", __func__); + } + + sdev = sbus->pbdev[devfn]; + + current_ranges = sdev->host_resv_ranges; + + g_assert(!sdev->probe_done); + + /* check that each new resv region is included in an existing one */ + if (sdev->host_resv_ranges) { + range_inverse_array(iova_ranges, + &new_ranges, + 0, UINT64_MAX); + + for (tmp = new_ranges; tmp; tmp = tmp->next) { + Range *newr = (Range *)tmp->data; + bool included = false; + + for (l = current_ranges; l; l = l->next) { + Range * r = (Range *)l->data; + + if (range_contains_range(r, newr)) { + included = true; + break; + } + } + if (!included) { + goto error; + } + } + /* all new reserved ranges are included in existing ones */ + ret = 0; + goto out; + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +error: + error_setg(errp, "%s Conflicting host reserved ranges set!", + __func__); +out: + g_list_free_full(new_ranges, g_free); + return ret; +} + static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, HostIOMMUDevice *hiod, Error **errp) { VirtIOIOMMU *viommu = opaque; + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); struct hiod_key *new_key; + GList *host_iova_ranges = NULL; assert(hiod); @@ -511,12 +608,28 @@ static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, return false; } + if (hiodc->get_iova_ranges) { + int ret; + host_iova_ranges = hiodc->get_iova_ranges(hiod, errp); + if (!host_iova_ranges) { + return true; /* some old kernels may not support that capability */ + } + ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn, + host_iova_ranges, errp); + if (ret) { + g_list_free_full(host_iova_ranges, g_free); + return false; + } + } + new_key = g_malloc(sizeof(*new_key)); new_key->bus = bus; new_key->devfn = devfn; object_ref(hiod); g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + g_list_free_full(host_iova_ranges, g_free); return true; } @@ -1238,40 +1351,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } -/** - * rebuild_resv_regions: rebuild resv regions with both the - * info of host resv ranges and property set resv ranges - */ -static int rebuild_resv_regions(IOMMUDevice *sdev) -{ - GList *l; - int i = 0; - - /* free the existing list and rebuild it from scratch */ - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - - /* First add host reserved regions if any, all tagged as RESERVED */ - for (l = sdev->host_resv_ranges; l; l = l->next) { - ReservedRegion *reg = g_new0(ReservedRegion, 1); - Range *r = (Range *)l->data; - - reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; - range_set_bounds(®->range, range_lob(r), range_upb(r)); - sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); - trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, - range_lob(®->range), - range_upb(®->range)); - i++; - } - /* - * then add higher priority reserved regions set by the machine - * through properties - */ - add_prop_resv_regions(sdev); - return 0; -} - /** * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges * From 3ba100b41946b10efc12c2493997f7074081707f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:56 +0200 Subject: [PATCH 23/42] virtio-iommu: Remove the implementation of iommu_set_iova_range Now that we use PCIIOMMUOps to convey information about usable IOVA ranges we do not to implement the iommu_set_iova_ranges IOMMU MR callback. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/virtio/virtio-iommu.c | 67 ---------------------------------------- 1 file changed, 67 deletions(-) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index a4c0cceb65..b9a7ddcd14 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1351,72 +1351,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } -/** - * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges - * - * The function turns those into reserved ranges. Once some - * reserved ranges have been set, new reserved regions cannot be - * added outside of the original ones. - * - * @mr: IOMMU MR - * @iova_ranges: list of usable IOVA ranges - * @errp: error handle - */ -static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - GList *current_ranges = sdev->host_resv_ranges; - GList *l, *tmp, *new_ranges = NULL; - int ret = -EINVAL; - - /* check that each new resv region is included in an existing one */ - if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; - } - - if (sdev->probe_done) { - warn_report("%s: Notified about new host reserved regions after probe", - mr->parent_obj.name); - } - - range_inverse_array(iova_ranges, - &sdev->host_resv_ranges, - 0, UINT64_MAX); - rebuild_resv_regions(sdev); - - return 0; -error: - error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", - mr->parent_obj.name); -out: - g_list_free_full(new_ranges, g_free); - return ret; -} - static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1742,7 +1676,6 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; - imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { From 44079a9839bc0a682db7c0ab6093fce79c73d261 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:57 +0200 Subject: [PATCH 24/42] hw/vfio: Remove memory_region_iommu_set_iova_ranges() call As we have just removed the only implementation of iommu_set_iova_ranges IOMMU MR callback in the virtio-iommu, let's remove the call to the memory wrapper. Usable IOVA ranges are now conveyed through the PCIIOMMUOps in VFIO-PCI. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- hw/vfio/common.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f20a7b5bba..9e4c0cc95f 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -630,16 +630,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - if (bcontainer->iova_ranges) { - ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, - bcontainer->iova_ranges, - &err); - if (ret) { - g_free(giommu); - goto fail; - } - } - ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { From 71386c6efd7d57cc549b1c3caff889e7506c54a9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 14 Jun 2024 11:52:58 +0200 Subject: [PATCH 25/42] memory: Remove IOMMU MR iommu_set_iova_range API Since the host IOVA ranges are now passed through the PCIIOMMUOps set_host_resv_regions and we have removed the only implementation of iommu_set_iova_range() in the virtio-iommu and the only call site in vfio/common, let's retire the IOMMU MR API and its memory wrapper. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Reviewed-by: Michael S. Tsirkin --- include/exec/memory.h | 32 -------------------------------- system/memory.c | 13 ------------- 2 files changed, 45 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 2d7c278b9f..0903513d13 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -530,26 +530,6 @@ struct IOMMUMemoryRegionClass { int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu, uint64_t page_size_mask, Error **errp); - /** - * @iommu_set_iova_ranges: - * - * Propagate information about the usable IOVA ranges for a given IOMMU - * memory region. Used for example to propagate host physical device - * reserved memory region constraints to the virtual IOMMU. - * - * Optional method: if this method is not provided, then the default IOVA - * aperture is used. - * - * @iommu: the IOMMUMemoryRegion - * - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * - * Returns 0 on success, or a negative error. In case of failure, the error - * object must be created. - */ - int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); }; typedef struct RamDiscardListener RamDiscardListener; @@ -1951,18 +1931,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, uint64_t page_size_mask, Error **errp); -/** - * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges - * for a given IOMMU MR region - * - * @iommu: IOMMU memory region - * @iova_ranges: list of ordered IOVA ranges (at least one range) - * @errp: pointer to Error*, to store an error if it happens. - */ -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu, - GList *iova_ranges, - Error **errp); - /** * memory_region_name: get a memory region's name * diff --git a/system/memory.c b/system/memory.c index 47c600df63..2d69521360 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1914,19 +1914,6 @@ int memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr, return ret; } -int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); - int ret = 0; - - if (imrc->iommu_set_iova_ranges) { - ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp); - } - return ret; -} - int memory_region_register_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n, Error **errp) { From 332b9b0da409d727ac4765fa613158189562dec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:33:53 +0200 Subject: [PATCH 26/42] vfio: Make vfio_devices_dma_logging_start() return bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since vfio_devices_dma_logging_start() takes an 'Error **' argument, best practices suggest to return a bool. See the api/error.h Rules section. It will simplify potential changes coming after. vfio_container_set_dirty_page_tracking() could be modified in the same way but the errno value can be saved in the migration stream when called from vfio_listener_log_global_stop(). Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 9e4c0cc95f..d48cd9b936 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1020,7 +1020,7 @@ static void vfio_device_feature_dma_logging_start_destroy( g_free(feature); } -static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, +static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, Error **errp) { struct vfio_device_feature *feature; @@ -1033,7 +1033,7 @@ static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, &ranges); if (!feature) { error_setg_errno(errp, errno, "Failed to prepare DMA logging"); - return -errno; + return false; } QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { @@ -1058,7 +1058,7 @@ out: vfio_device_feature_dma_logging_start_destroy(feature); - return ret; + return ret == 0; } static bool vfio_listener_log_global_start(MemoryListener *listener, @@ -1067,18 +1067,18 @@ static bool vfio_listener_log_global_start(MemoryListener *listener, ERRP_GUARD(); VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); - int ret; + bool ret; if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ret = vfio_devices_dma_logging_start(bcontainer, errp); } else { - ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp); + ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp) == 0; } - if (ret) { + if (!ret) { error_prepend(errp, "vfio: Could not start dirty page tracking - "); } - return !ret; + return ret; } static void vfio_listener_log_global_stop(MemoryListener *listener) From 889833e5ae1fe83b39e065b7386eb020af7cf304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:33:54 +0200 Subject: [PATCH 27/42] vfio: Remove unused declarations from vfio-common.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were forgotten in the recent cleanups. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- include/hw/vfio/vfio-common.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 776de8064f..c19572f90b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -207,10 +207,6 @@ typedef struct VFIODisplay { VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); -/* SPAPR specific */ -int vfio_spapr_container_init(VFIOContainer *container, Error **errp); -void vfio_spapr_container_deinit(VFIOContainer *container); - void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index); From 344e70945db3af08862e37a8bb10afaf4c59f88b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Mon, 17 Jun 2024 08:33:55 +0200 Subject: [PATCH 28/42] vfio/common: Move dirty tracking ranges update to helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate the changes that update the ranges from the listener, to make it reusable in preparation to expand its use to vIOMMU support. Signed-off-by: Joao Martins Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger [ clg: - Rebased on upstream - Introduced vfio_dirty_tracking_update_range() - Fixed typ in commit log ] Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index d48cd9b936..fe215918bd 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -839,20 +839,11 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, return false; } -static void vfio_dirty_tracking_update(MemoryListener *listener, - MemoryRegionSection *section) +static void vfio_dirty_tracking_update_range(VFIODirtyRanges *range, + hwaddr iova, hwaddr end, + bool update_pci) { - VFIODirtyRangesListener *dirty = container_of(listener, - VFIODirtyRangesListener, - listener); - VFIODirtyRanges *range = &dirty->ranges; - hwaddr iova, end, *min, *max; - - if (!vfio_listener_valid_section(section, "tracking_update") || - !vfio_get_section_iova_range(dirty->bcontainer, section, - &iova, &end, NULL)) { - return; - } + hwaddr *min, *max; /* * The address space passed to the dirty tracker is reduced to three ranges: @@ -873,8 +864,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, * The alternative would be an IOVATree but that has a much bigger runtime * overhead and unnecessary complexity. */ - if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && - iova >= UINT32_MAX) { + if (update_pci && iova >= UINT32_MAX) { min = &range->minpci64; max = &range->maxpci64; } else { @@ -889,7 +879,23 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, } trace_vfio_device_dirty_tracking_update(iova, end, *min, *max); - return; +} + +static void vfio_dirty_tracking_update(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIODirtyRangesListener *dirty = + container_of(listener, VFIODirtyRangesListener, listener); + hwaddr iova, end; + + if (!vfio_listener_valid_section(section, "tracking_update") || + !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } + + vfio_dirty_tracking_update_range(&dirty->ranges, iova, end, + vfio_section_is_vfio_pci(section, dirty->bcontainer)); } static const MemoryListener vfio_dirty_tracking_listener = { From 723f702b89b9c86058a608db9dea3b5618ff284a Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 17 Jun 2024 08:33:56 +0200 Subject: [PATCH 29/42] vfio/common: Extract vIOMMU code from vfio_sync_dirty_bitmap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract vIOMMU code from vfio_sync_dirty_bitmap() to a new function and restructure the code. This is done in preparation for optimizing vIOMMU device dirty page tracking. No functional changes intended. Signed-off-by: Avihai Horon Signed-off-by: Joao Martins [ clg: - Rebased on upstream - Fixed typo in commit log ] Reviewed-by: Zhenzhong Duan Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 63 +++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fe215918bd..f28641bad5 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1302,37 +1302,50 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, &vrdl); } +static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIOGuestIOMMU *giommu; + bool found = false; + Int128 llend; + vfio_giommu_dirty_notifier gdn; + int idx; + + QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + found = true; + break; + } + } + + if (!found) { + return 0; + } + + gdn.giommu = giommu; + idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, + MEMTXATTRS_UNSPECIFIED); + + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); + + iommu_notifier_init(&gdn.n, vfio_iommu_map_dirty_notify, IOMMU_NOTIFIER_MAP, + section->offset_within_region, int128_get64(llend), + idx); + memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); + + return 0; +} + static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { ram_addr_t ram_addr; if (memory_region_is_iommu(section->mr)) { - VFIOGuestIOMMU *giommu; - - QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { - if (MEMORY_REGION(giommu->iommu_mr) == section->mr && - giommu->n.start == section->offset_within_region) { - Int128 llend; - vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; - int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr, - MEMTXATTRS_UNSPECIFIED); - - llend = int128_add(int128_make64(section->offset_within_region), - section->size); - llend = int128_sub(llend, int128_one()); - - iommu_notifier_init(&gdn.n, - vfio_iommu_map_dirty_notify, - IOMMU_NOTIFIER_MAP, - section->offset_within_region, - int128_get64(llend), - idx); - memory_region_iommu_replay(giommu->iommu_mr, &gdn.n); - break; - } - } - return 0; + return vfio_sync_iommu_dirty_bitmap(bcontainer, section); } else if (memory_region_has_ram_discard_manager(section->mr)) { int ret; From b7b79588ebb365e157ec2425a4fa472a314c3ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:33:57 +0200 Subject: [PATCH 30/42] vfio/container: Introduce vfio_address_space_insert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It prepares ground for a future change initializing the 'space' pointer of VFIOContainerBase. The goal is to replace vfio_container_init() by an .instance_init() handler when VFIOContainerBase is QOMified. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 6 ++++++ hw/vfio/container.c | 2 +- hw/vfio/iommufd.c | 2 +- include/hw/vfio/vfio-common.h | 2 ++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f28641bad5..8cdf26c6f5 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1508,6 +1508,12 @@ void vfio_put_address_space(VFIOAddressSpace *space) } } +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer) +{ + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); +} + struct vfio_device_info *vfio_get_device_info(int fd) { struct vfio_device_info *info; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index c48749c089..0237c21698 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -637,7 +637,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, vfio_kvm_device_add_group(group); QLIST_INIT(&container->group_list); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index e502081c2a..9f8f33e383 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -358,7 +358,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, bcontainer = &container->bcontainer; vfio_container_init(bcontainer, space, iommufd_vioc); - QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { goto err_attach_container; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c19572f90b..825d80130b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -206,6 +206,8 @@ typedef struct VFIODisplay { VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); +void vfio_address_space_insert(VFIOAddressSpace *space, + VFIOContainerBase *bcontainer); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); From 09181a8e9729c77c69f8d3988a1dbed0b91402d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:33:58 +0200 Subject: [PATCH 31/42] vfio/container: Simplify vfio_container_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assign the base container VFIOAddressSpace 'space' pointer in vfio_address_space_insert(). The ultimate goal is to remove vfio_container_init() and instead rely on an .instance_init() handler to perfom the initialization of VFIOContainerBase. To be noted that vfio_connect_container() will assign the 'space' pointer later in the execution flow. This should not have any consequence. Reviewed-by: Zhenzhong Duan Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 1 + hw/vfio/container-base.c | 3 +-- hw/vfio/container.c | 6 +++--- hw/vfio/iommufd.c | 2 +- include/hw/vfio/vfio-container-base.h | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 8cdf26c6f5..1686a0bed2 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1512,6 +1512,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, VFIOContainerBase *bcontainer) { QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + bcontainer->space = space; } struct vfio_device_info *vfio_get_device_info(int fd) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 760d9d0622..280f0dd2db 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -71,11 +71,10 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, errp); } -void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, +void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUClass *ops) { bcontainer->ops = ops; - bcontainer->space = space; bcontainer->error = NULL; bcontainer->dirty_pages_supported = false; bcontainer->dma_max_mappings = 0; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 0237c21698..dc85a79cb9 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -394,7 +394,7 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) } static bool vfio_set_iommu(VFIOContainer *container, int group_fd, - VFIOAddressSpace *space, Error **errp) + Error **errp) { int iommu_type; const VFIOIOMMUClass *vioc; @@ -432,7 +432,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int group_fd, return false; } - vfio_container_init(&container->bcontainer, space, vioc); + vfio_container_init(&container->bcontainer, vioc); return true; } @@ -614,7 +614,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->fd = fd; bcontainer = &container->bcontainer; - if (!vfio_set_iommu(container, group->fd, space, errp)) { + if (!vfio_set_iommu(container, group->fd, errp)) { goto free_container_exit; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 9f8f33e383..e5d9334142 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -357,7 +357,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, container->ioas_id = ioas_id; bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, space, iommufd_vioc); + vfio_container_init(bcontainer, iommufd_vioc); vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 442c0dfc4c..d505f63607 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -87,7 +87,6 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); void vfio_container_init(VFIOContainerBase *bcontainer, - VFIOAddressSpace *space, const VFIOIOMMUClass *ops); void vfio_container_destroy(VFIOContainerBase *bcontainer); From 55974f35eac3d8166a8dee498b1efee2ef9663c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:33:59 +0200 Subject: [PATCH 32/42] vfio/container: Modify vfio_get_iommu_type() to use a container fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'container' pointer has no other use than its 'fd' attribute. Simplify the prototype to ease future changes. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index dc85a79cb9..589f37bc6d 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -354,7 +354,7 @@ static void vfio_kvm_device_del_group(VFIOGroup *group) /* * vfio_get_iommu_type - selects the richest iommu_type (v2 first) */ -static int vfio_get_iommu_type(VFIOContainer *container, +static int vfio_get_iommu_type(int container_fd, Error **errp) { int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, @@ -362,7 +362,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, int i; for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { return iommu_types[i]; } } @@ -399,7 +399,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int group_fd, int iommu_type; const VFIOIOMMUClass *vioc; - iommu_type = vfio_get_iommu_type(container, errp); + iommu_type = vfio_get_iommu_type(container->fd, errp); if (iommu_type < 0) { return false; } From 17401879c416c48c72e5e4d805244b893d44637c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:00 +0200 Subject: [PATCH 33/42] vfio/container: Introduce vfio_get_iommu_class_name() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework vfio_get_iommu_class() to return a literal class name instead of a class object. We will need this name to instantiate the object later on. Since the default case asserts, remove the error report as QEMU will simply abort before. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 589f37bc6d..bb6abe60ee 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -373,24 +373,20 @@ static int vfio_get_iommu_type(int container_fd, /* * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type */ -static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) +static const char *vfio_get_iommu_class_name(int iommu_type) { - ObjectClass *klass = NULL; - switch (iommu_type) { case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + return TYPE_VFIO_IOMMU_LEGACY; break; case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: - klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); + return TYPE_VFIO_IOMMU_SPAPR; break; default: g_assert_not_reached(); }; - - return VFIO_IOMMU_CLASS(klass); } static bool vfio_set_iommu(VFIOContainer *container, int group_fd, @@ -398,6 +394,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int group_fd, { int iommu_type; const VFIOIOMMUClass *vioc; + const char *vioc_name; iommu_type = vfio_get_iommu_type(container->fd, errp); if (iommu_type < 0) { @@ -426,11 +423,8 @@ static bool vfio_set_iommu(VFIOContainer *container, int group_fd, container->iommu_type = iommu_type; - vioc = vfio_get_iommu_class(iommu_type, errp); - if (!vioc) { - error_setg(errp, "No available IOMMU models"); - return false; - } + vioc_name = vfio_get_iommu_class_name(iommu_type); + vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name)); vfio_container_init(&container->bcontainer, vioc); return true; From 58f5c13260c2bf8fe158d0e176d1595858157946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:01 +0200 Subject: [PATCH 34/42] vfio/container: Introduce vfio_create_container() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This routine allocates the QEMU struct type representing the VFIO container. It is minimal currently and future changes will do more initialization. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index bb6abe60ee..a869194279 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -430,6 +430,16 @@ static bool vfio_set_iommu(VFIOContainer *container, int group_fd, return true; } +static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, + Error **errp) +{ + VFIOContainer *container; + + container = g_malloc0(sizeof(*container)); + container->fd = fd; + return container; +} + static int vfio_get_iommu_info(VFIOContainer *container, struct vfio_iommu_type1_info **info) { @@ -604,13 +614,14 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto close_fd_exit; } - container = g_malloc0(sizeof(*container)); - container->fd = fd; - bcontainer = &container->bcontainer; - + container = vfio_create_container(fd, group, errp); + if (!container) { + goto close_fd_exit; + } if (!vfio_set_iommu(container, group->fd, errp)) { goto free_container_exit; } + bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { goto free_container_exit; From 9550fdfd29f52d548d99aed2b1a002308ad6175a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:02 +0200 Subject: [PATCH 35/42] vfio/container: Discover IOMMU type before creating the container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the QEMU struct type representing the VFIO container is deduced from the IOMMU type exposed by the host, this type should be well defined *before* creating the container struct. This will be necessary to instantiate a QOM object of the correct type in future changes. Rework vfio_set_iommu() to extract the part doing the container initialization and move it under vfio_create_container(). Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 47 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index a869194279..31bdc46a96 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -389,54 +389,56 @@ static const char *vfio_get_iommu_class_name(int iommu_type) }; } -static bool vfio_set_iommu(VFIOContainer *container, int group_fd, - Error **errp) +static bool vfio_set_iommu(int container_fd, int group_fd, + int *iommu_type, Error **errp) { - int iommu_type; - const VFIOIOMMUClass *vioc; - const char *vioc_name; - - iommu_type = vfio_get_iommu_type(container->fd, errp); - if (iommu_type < 0) { - return false; - } - - if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) { error_setg_errno(errp, errno, "Failed to set group container"); return false; } - while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { - if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) { + if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { /* * On sPAPR, despite the IOMMU subdriver always advertises v1 and * v2, the running platform may not support v2 and there is no * way to guess it until an IOMMU group gets added to the container. * So in case it fails with v2, try v1 as a fallback. */ - iommu_type = VFIO_SPAPR_TCE_IOMMU; + *iommu_type = VFIO_SPAPR_TCE_IOMMU; continue; } error_setg_errno(errp, errno, "Failed to set iommu for container"); return false; } - container->iommu_type = iommu_type; - - vioc_name = vfio_get_iommu_class_name(iommu_type); - vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name)); - - vfio_container_init(&container->bcontainer, vioc); return true; } static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, Error **errp) { + int iommu_type; + const VFIOIOMMUClass *vioc; + const char *vioc_name; VFIOContainer *container; + iommu_type = vfio_get_iommu_type(fd, errp); + if (iommu_type < 0) { + return NULL; + } + + if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + return NULL; + } + + vioc_name = vfio_get_iommu_class_name(iommu_type); + vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name)); + container = g_malloc0(sizeof(*container)); container->fd = fd; + container->iommu_type = iommu_type; + vfio_container_init(&container->bcontainer, vioc); return container; } @@ -618,9 +620,6 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (!container) { goto close_fd_exit; } - if (!vfio_set_iommu(container, group->fd, errp)) { - goto free_container_exit; - } bcontainer = &container->bcontainer; if (!vfio_cpr_register_container(bcontainer, errp)) { From 504d297e10fdfe1b1243274e334abb0074ee69f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:03 +0200 Subject: [PATCH 36/42] vfio/container: Change VFIOContainerBase to use QOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VFIOContainerBase was made a QOM interface because we believed that a QOM object would expose all the IOMMU backends to the QEMU machine and human interface. This only applies to user creatable devices or objects. Change the VFIOContainerBase nature from interface to object and make the necessary adjustments in the VFIO_IOMMU hierarchy. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container-base.c | 4 +++- hw/vfio/container.c | 1 + hw/vfio/iommufd.c | 1 + hw/vfio/spapr.c | 3 +++ include/hw/vfio/vfio-common.h | 4 ++++ include/hw/vfio/vfio-container-base.h | 12 +++--------- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 280f0dd2db..98c15e174d 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -102,8 +102,10 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU, - .parent = TYPE_INTERFACE, + .parent = TYPE_OBJECT, + .instance_size = sizeof(VFIOContainerBase), .class_size = sizeof(VFIOIOMMUClass), + .abstract = true, }, }; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 31bdc46a96..3ae52530a9 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -1196,6 +1196,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_LEGACY, .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOContainer), .class_init = vfio_iommu_legacy_class_init, }, { .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO, diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index e5d9334142..3e9d642034 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -672,6 +672,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_IOMMUFD, .parent = TYPE_VFIO_IOMMU, + .instance_size = sizeof(VFIOIOMMUFDContainer), .class_init = vfio_iommu_iommufd_class_init, }, { .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 47b040f1bc..018bd20481 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -30,6 +30,8 @@ typedef struct VFIOSpaprContainer { QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; } VFIOSpaprContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR); + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) { if (memory_region_is_iommu(section->mr)) { @@ -548,6 +550,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_SPAPR, .parent = TYPE_VFIO_IOMMU_LEGACY, + .instance_size = sizeof(VFIOSpaprContainer), .class_init = vfio_iommu_spapr_class_init, }, }; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 825d80130b..e8ddf92bb1 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -84,6 +84,8 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOGroup) group_list; } VFIOContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); + typedef struct VFIOHostDMAWindow { hwaddr min_iova; hwaddr max_iova; @@ -99,6 +101,8 @@ typedef struct VFIOIOMMUFDContainer { uint32_t ioas_id; } VFIOIOMMUFDContainer; +OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD); + typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index d505f63607..b079b76f68 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -34,6 +34,7 @@ typedef struct VFIOAddressSpace { * This is the base object for vfio container backends */ typedef struct VFIOContainerBase { + Object parent; const VFIOIOMMUClass *ops; VFIOAddressSpace *space; MemoryListener listener; @@ -96,17 +97,10 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" -/* - * VFIOContainerBase is not an abstract QOM object because it felt - * unnecessary to expose all the IOMMU backends to the QEMU machine - * and human interface. However, we can still abstract the IOMMU - * backend handlers using a QOM interface class. This provides more - * flexibility when referencing the various implementations. - */ -DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) +OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { - InterfaceClass parent_class; + ObjectClass parent_class; /* Properties */ const char *hiod_typename; From 938026053f43031319d5e2159dcf4f993519afef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:04 +0200 Subject: [PATCH 37/42] vfio/container: Switch to QOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of allocating the container struct, create a QOM object of the appropriate type. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 6 +++--- hw/vfio/iommufd.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 3ae52530a9..ff3a6831da 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -435,7 +435,7 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, vioc_name = vfio_get_iommu_class_name(iommu_type); vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name)); - container = g_malloc0(sizeof(*container)); + container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); container->fd = fd; container->iommu_type = iommu_type; vfio_container_init(&container->bcontainer, vioc); @@ -674,7 +674,7 @@ unregister_container_exit: vfio_cpr_unregister_container(bcontainer); free_container_exit: - g_free(container); + object_unref(container); close_fd_exit: close(fd); @@ -718,7 +718,7 @@ static void vfio_disconnect_container(VFIOGroup *group) trace_vfio_disconnect_container(container->fd); vfio_cpr_unregister_container(bcontainer); close(container->fd); - g_free(container); + object_unref(container); vfio_put_address_space(space); } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 3e9d642034..d59df85840 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -239,7 +239,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) memory_listener_unregister(&bcontainer->listener); vfio_container_destroy(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); - g_free(container); + object_unref(container); } static int iommufd_cdev_ram_block_discard_disable(bool state) @@ -352,7 +352,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); - container = g_malloc0(sizeof(*container)); + container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD)); container->be = vbasedev->iommufd; container->ioas_id = ioas_id; From 2137d2fd1779df61ae011186b0f3a8ecb9ca0a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:05 +0200 Subject: [PATCH 38/42] vfio/container: Introduce an instance_init() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to move the initialization code from vfio_container_init(), which we will soon remove. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container-base.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 98c15e174d..3858f5ab1d 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -75,12 +75,6 @@ void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUClass *ops) { bcontainer->ops = ops; - bcontainer->error = NULL; - bcontainer->dirty_pages_supported = false; - bcontainer->dma_max_mappings = 0; - bcontainer->iova_ranges = NULL; - QLIST_INIT(&bcontainer->giommu_list); - QLIST_INIT(&bcontainer->vrdl_list); } void vfio_container_destroy(VFIOContainerBase *bcontainer) @@ -99,10 +93,23 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) g_list_free_full(bcontainer->iova_ranges, g_free); } +static void vfio_container_instance_init(Object *obj) +{ + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); + + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); +} + static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU, .parent = TYPE_OBJECT, + .instance_init = vfio_container_instance_init, .instance_size = sizeof(VFIOContainerBase), .class_size = sizeof(VFIOIOMMUClass), .abstract = true, From 41d698b8d63b719c5b32bd056109be272f6dd740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:06 +0200 Subject: [PATCH 39/42] vfio/container: Remove VFIOContainerBase::ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead, use VFIO_IOMMU_GET_CLASS() to get the class pointer. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 2 +- hw/vfio/container-base.c | 37 +++++++++++++++++---------- hw/vfio/container.c | 15 ++++++----- hw/vfio/iommufd.c | 4 +-- hw/vfio/pci.c | 4 +-- include/hw/vfio/vfio-container-base.h | 1 - 6 files changed, 38 insertions(+), 25 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 1686a0bed2..7cdb969fd3 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -1573,5 +1573,5 @@ void vfio_detach_device(VFIODevice *vbasedev) return; } object_unref(vbasedev->hiod); - vbasedev->bcontainer->ops->detach_device(vbasedev); + VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev); } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 3858f5ab1d..24669d4d74 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -19,62 +19,73 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly) { - g_assert(bcontainer->ops->dma_map); - return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_map); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) { - g_assert(bcontainer->ops->dma_unmap); - return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->dma_unmap); + return vioc->dma_unmap(bcontainer, iova, size, iotlb); } bool vfio_container_add_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { - if (!bcontainer->ops->add_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->add_window) { return true; } - return bcontainer->ops->add_window(bcontainer, section, errp); + return vioc->add_window(bcontainer, section, errp); } void vfio_container_del_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - if (!bcontainer->ops->del_window) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + if (!vioc->del_window) { return; } - return bcontainer->ops->del_window(bcontainer, section); + return vioc->del_window(bcontainer, section); } int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, bool start, Error **errp) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + if (!bcontainer->dirty_pages_supported) { return 0; } - g_assert(bcontainer->ops->set_dirty_page_tracking); - return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp); + g_assert(vioc->set_dirty_page_tracking); + return vioc->set_dirty_page_tracking(bcontainer, start, errp); } int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { - g_assert(bcontainer->ops->query_dirty_bitmap); - return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size, + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + g_assert(vioc->query_dirty_bitmap); + return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size, errp); } void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUClass *ops) { - bcontainer->ops = ops; } void vfio_container_destroy(VFIOContainerBase *bcontainer) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index ff3a6831da..a2f5fbad00 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -548,6 +548,7 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, VFIOContainerBase *bcontainer; int ret, fd; VFIOAddressSpace *space; + VFIOIOMMUClass *vioc; space = vfio_get_address_space(as); @@ -632,9 +633,10 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto unregister_container_exit; } - assert(bcontainer->ops->setup); + vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + assert(vioc->setup); - if (!bcontainer->ops->setup(bcontainer, errp)) { + if (!vioc->setup(bcontainer, errp)) { goto enable_discards_exit; } @@ -663,8 +665,8 @@ listener_release_exit: QLIST_REMOVE(bcontainer, next); vfio_kvm_device_del_group(group); memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } enable_discards_exit: @@ -689,6 +691,7 @@ static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); QLIST_REMOVE(group, container_next); group->container = NULL; @@ -700,8 +703,8 @@ static void vfio_disconnect_container(VFIOGroup *group) */ if (QLIST_EMPTY(&container->group_list)) { memory_listener_unregister(&bcontainer->listener); - if (bcontainer->ops->release) { - bcontainer->ops->release(bcontainer); + if (vioc->release) { + vioc->release(bcontainer); } } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index d59df85840..7bc76f80b4 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -324,7 +324,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, /* try to attach to an existing container in this space */ QLIST_FOREACH(bcontainer, &space->containers, next) { container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); - if (bcontainer->ops != iommufd_vioc || + if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc || vbasedev->iommufd != container->be) { continue; } @@ -465,7 +465,7 @@ static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { - if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + if (VFIO_IOMMU_GET_CLASS(vbasedev_iter->bcontainer) != iommufd_vioc) { continue; } if (devid == vbasedev_iter->devid) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index d8a76c1ee0..e03d9f3ba5 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2511,9 +2511,9 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) { VFIODevice *vbasedev = &vdev->vbasedev; - const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + const VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer); - return ops->pci_hot_reset(vbasedev, single); + return vioc->pci_hot_reset(vbasedev, single); } /* diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index b079b76f68..6b57cd8e7f 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -35,7 +35,6 @@ typedef struct VFIOAddressSpace { */ typedef struct VFIOContainerBase { Object parent; - const VFIOIOMMUClass *ops; VFIOAddressSpace *space; MemoryListener listener; Error *error; From 2f7243cb8a3184cc26c70805e5aaec07fac943d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:07 +0200 Subject: [PATCH 40/42] vfio/container: Remove vfio_container_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's now empty. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container-base.c | 5 ----- hw/vfio/container.c | 3 --- hw/vfio/iommufd.c | 1 - include/hw/vfio/vfio-container-base.h | 2 -- 4 files changed, 11 deletions(-) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 24669d4d74..970ae2356a 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -83,11 +83,6 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, errp); } -void vfio_container_init(VFIOContainerBase *bcontainer, - const VFIOIOMMUClass *ops) -{ -} - void vfio_container_destroy(VFIOContainerBase *bcontainer) { VFIOGuestIOMMU *giommu, *tmp; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index a2f5fbad00..3f2032d5c4 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -419,7 +419,6 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, Error **errp) { int iommu_type; - const VFIOIOMMUClass *vioc; const char *vioc_name; VFIOContainer *container; @@ -433,12 +432,10 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, } vioc_name = vfio_get_iommu_class_name(iommu_type); - vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name)); container = VFIO_IOMMU_LEGACY(object_new(vioc_name)); container->fd = fd; container->iommu_type = iommu_type; - vfio_container_init(&container->bcontainer, vioc); return container; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 7bc76f80b4..09b71a6617 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -357,7 +357,6 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, container->ioas_id = ioas_id; bcontainer = &container->bcontainer; - vfio_container_init(bcontainer, iommufd_vioc); vfio_address_space_insert(space, bcontainer); if (!iommufd_cdev_attach_container(vbasedev, container, errp)) { diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 6b57cd8e7f..6242a62771 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -86,8 +86,6 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); -void vfio_container_init(VFIOContainerBase *bcontainer, - const VFIOIOMMUClass *ops); void vfio_container_destroy(VFIOContainerBase *bcontainer); From b052f73cbec3bf593a04b2f5cdaf3569256859a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:08 +0200 Subject: [PATCH 41/42] vfio/container: Introduce vfio_iommu_legacy_instance_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just as we did for the VFIOContainerBase object, introduce an instance_init() handler for the legacy VFIOContainer object and do the specific initialization there. Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 3f2032d5c4..45123acbdd 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -639,7 +639,6 @@ static bool vfio_connect_container(VFIOGroup *group, AddressSpace *as, vfio_kvm_device_add_group(group); - QLIST_INIT(&container->group_list); vfio_address_space_insert(space, bcontainer); group->container = container; @@ -1183,6 +1182,13 @@ hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp) return l; } +static void vfio_iommu_legacy_instance_init(Object *obj) +{ + VFIOContainer *container = VFIO_IOMMU_LEGACY(obj); + + QLIST_INIT(&container->group_list); +} + static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data) { HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc); @@ -1196,6 +1202,7 @@ static const TypeInfo types[] = { { .name = TYPE_VFIO_IOMMU_LEGACY, .parent = TYPE_VFIO_IOMMU, + .instance_init = vfio_iommu_legacy_instance_init, .instance_size = sizeof(VFIOContainer), .class_init = vfio_iommu_legacy_class_init, }, { From 96b7af4388b38bc1f66467a9c7c8ee9d3bff500f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Mon, 17 Jun 2024 08:34:09 +0200 Subject: [PATCH 42/42] vfio/container: Move vfio_container_destroy() to an instance_finalize() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vfio_container_destroy() clears the resources allocated VFIOContainerBase object. Now that VFIOContainerBase is a QOM object, add an instance_finalize() handler to do the cleanup. It will be called through object_unref(). Suggested-by: Zhenzhong Duan Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Cédric Le Goater --- hw/vfio/container-base.c | 4 +++- hw/vfio/container.c | 2 -- hw/vfio/iommufd.c | 1 - include/hw/vfio/vfio-container-base.h | 3 --- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 970ae2356a..50b1664f89 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -83,8 +83,9 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, errp); } -void vfio_container_destroy(VFIOContainerBase *bcontainer) +static void vfio_container_instance_finalize(Object *obj) { + VFIOContainerBase *bcontainer = VFIO_IOMMU(obj); VFIOGuestIOMMU *giommu, *tmp; QLIST_REMOVE(bcontainer, next); @@ -116,6 +117,7 @@ static const TypeInfo types[] = { .name = TYPE_VFIO_IOMMU, .parent = TYPE_OBJECT, .instance_init = vfio_container_instance_init, + .instance_finalize = vfio_container_instance_finalize, .instance_size = sizeof(VFIOContainerBase), .class_size = sizeof(VFIOIOMMUClass), .abstract = true, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 45123acbdd..2e7ecdf10e 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -712,8 +712,6 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = bcontainer->space; - vfio_container_destroy(bcontainer); - trace_vfio_disconnect_container(container->fd); vfio_cpr_unregister_container(bcontainer); close(container->fd); diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 09b71a6617..c2f158e603 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -237,7 +237,6 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) return; } memory_listener_unregister(&bcontainer->listener); - vfio_container_destroy(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); } diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 6242a62771..419e45ee7a 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -86,9 +86,6 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); -void vfio_container_destroy(VFIOContainerBase *bcontainer); - - #define TYPE_VFIO_IOMMU "vfio-iommu" #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"